In [None]:
from google.colab import drive
drive.mount('/content/drive')

# ================================================
# Cell 1 — Imports, anchors, weights, I/O helpers
# ================================================
from __future__ import annotations
import os

import math
import re
from typing import Optional, Dict

import pandas as pd
import numpy as np

# ----------------------------
# Normalisation anchors (log-10)
# ----------------------------
# Per your spec: span from ~8 hours of perfect-health loss to ~20 healthy years
DALY_MIN: float = 0.001   # lower anchor for log-normalisation
DALY_MAX: float = 20.0    # upper anchor for log-normalisation

def log_normalise(x: float, xmin: float = DALY_MIN, xmax: float = DALY_MAX) -> float:
    """
    Map a positive value x to [0, 1] on a log10 scale between xmin and xmax.
    Values below xmin are clipped to xmin to avoid -inf.
    """
    x = max(float(x), xmin)
    return (math.log10(x) - math.log10(xmin)) / (math.log10(xmax) - math.log10(xmin))


# ----------------------------
# Default weights (SIS framework)
# ----------------------------
# We only need B for now, but we keep the full set for later cells.
DEFAULT_WEIGHTS: Dict[str, float] = {
    "B": 0.20,  # Disease-burden severity (per-case DALY)
    "P": 0.20,  # Potential beneficiaries
    "G": 0.30,  # Health gain per beneficiary
    "T": 0.20,  # Transformative potential
    "O": 0.10,  # Openness & prompt sharing
}

# ----------------------------
# Console I/O helpers (for later interactive steps)
# ----------------------------
def ask_float(prompt: str, default: Optional[float] = None,
              minv: Optional[float] = None, maxv: Optional[float] = None) -> float:
    """
    Robust float input with default & bounds.
    """
    while True:
        s = input(f"{prompt}" + (f" [default {default}]" if default is not None else "") + ": ").strip()
        if not s and default is not None:
            val = float(default)
        else:
            try:
                val = float(s)
            except ValueError:
                print("  Please enter a number.")
                continue
        if minv is not None and val < minv:
            print(f"  Must be ≥ {minv}."); continue
        if maxv is not None and val > maxv:
            print(f"  Must be ≤ {maxv}."); continue
        return val

def ask_int(prompt: str, default: Optional[int] = None,
            minv: Optional[int] = None, maxv: Optional[int] = None) -> int:
    """
    Robust int input with default & bounds.
    """
    while True:
        s = input(f"{prompt}" + (f" [default {default}]" if default is not None else "") + ": ").strip()
        if not s and default is not None:
            val = int(default)
        else:
            try:
                val = int(s)
            except ValueError:
                print("  Please enter an integer.")
                continue
        if minv is not None and val < minv:
            print(f"  Must be ≥ {minv}."); continue
        if maxv is not None and val > maxv:
            print(f"  Must be ≤ {maxv}."); continue
        return val

def yesno(prompt: str, default: str = "y") -> bool:
    """
    Yes/No prompt with default ('y' or 'n').
    """
    d = default.lower() if default else None
    while True:
        s = input(f"{prompt} [y/n]" + (f" (default {d})" if d else "") + ": ").strip().lower()
        if not s and d:
            return d == "y"
        if s in ("y", "yes"): return True
        if s in ("n", "no"):  return False
        print("  Please answer y or n.")

# ----------------------------
# Small utilities we'll reuse
# ----------------------------
def safe_div(num: float, den: float) -> float:
    """Safe divide; returns 0 if denominator is 0 or NaN."""
    if den is None or den == 0 or np.isnan(den):
        return 0.0
    return float(num) / float(den)

def slugify_cols(df: pd.DataFrame) -> pd.DataFrame:
    """
    Lowercase + replace whitespace with underscores in column names.
    Returns a *copy* with normalised column names.
    """
    out = df.copy()
    out.columns = [re.sub(r"\s+", "_", c.strip().lower()) for c in out.columns]
    return out

def standardise_measure_key(measure_name: str) -> str:
    """
    Collapse GBD measure names to simple keys:
      'DALYs (Disability-Adjusted...)' -> 'daly'
      'YLLs (Years of Life Lost)'     -> 'yll'
      'YLDs (Years Lived with...)'    -> 'yld'
      'Incidence'                     -> 'incidence'
    """
    s = str(measure_name).strip().lower()
    # keep 'incidence' as-is
    s = re.sub(r"\s*\(.*?\)", "", s)  # drop parenthetical
    s = s.replace("disability-adjusted life years", "daly")
    s = s.replace("years of life lost", "yll")
    s = s.replace("years lived with disability", "yld")
    s = s.rstrip("s")  # ylls->yll, ylds->yld
    return s




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# ================================================
# Cell 2 — GBD Results loader & per-case DALY computation
# ================================================

from typing import Optional


def load_gbd_results_csv(path: str) -> pd.DataFrame:
    """
    Load a CSV exported from the IHME GBD Results tool and normalise columns.
    Returns a DataFrame with at least these columns:
      measure_name, metric_name, cause_name, location_name, sex_name, age_name, year, val, lower, upper
    (year/lower/upper may be missing in some exports; we coerce if present.)
    """
    df = pd.read_csv(path)
    df = slugify_cols(df)

    # Map common variants to canonical names
    rename = {}
    if "year_id" in df.columns and "year" not in df.columns:
        rename["year_id"] = "year"
    if "val" not in df.columns:
        if "mean" in df.columns:
            rename["mean"] = "val"
        elif "value" in df.columns:
            rename["value"] = "val"
    if "upper" not in df.columns and "upper_ci" in df.columns:
        rename["upper_ci"] = "upper"
    if "lower" not in df.columns and "lower_ci" in df.columns:
        rename["lower_ci"] = "lower"
    df = df.rename(columns=rename)

    required = [
        "measure_name","metric_name","cause_name","location_name","sex_name","age_name","val"
    ]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns in CSV: {missing}")

    # Ensure expected dtypes
    for c in ["measure_name","metric_name","cause_name","location_name","sex_name","age_name"]:
        df[c] = df[c].astype(str)
    for c in ["val","lower","upper","year"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    return df


def compute_per_case_burden(
    df: pd.DataFrame,
    disease: str,
    year: Optional[int] = None,
    location: str = "Global",
    sex: str = "Both",
    age: str = "All Ages",
    metric: str = "Number",
) -> pd.DataFrame:
    """
    Compute per-case burden directly from GBD totals.

    DALY_per_case = DALYs / Incidence  (or (YLLs + YLDs) / Incidence)
    YLL_per_case  = YLLs / Incidence
    YLD_per_case  = YLDs / Incidence

    Returns a one-row DataFrame with totals, per-case metrics, and the normalised B score.
    """
    # Filter rows to the requested strata
    mask = (
        df["cause_name"].str.contains(disease, case=False, na=False)
        & df["location_name"].str.lower().eq(location.lower())
        & df["sex_name"].str.lower().eq(sex.lower())
        & df["age_name"].str.lower().eq(age.lower())
        & df["metric_name"].str.lower().eq(metric.lower())
    )
    if year is not None and "year" in df.columns:
        mask &= (df["year"] == year)

    sub = df.loc[mask].copy()
    if sub.empty:
        raise ValueError(
            "No rows matched your filters. Check disease string, year, and that your CSV includes Global/Both/All Ages/Number."
        )

    # Standardise measure names to simple keys: daly/yll/yld/incidence
    sub["measure_key"] = sub["measure_name"].map(standardise_measure_key)

    # Aggregate across any remaining strata (e.g. etiologies, subcauses)
    agg = sub.groupby("measure_key", as_index=False)["val"].sum()

    def get_total(key: str) -> Optional[float]:
        m = agg[agg["measure_key"] == key]
        return float(m["val"].sum()) if not m.empty else None

    total_inc = get_total("incidence")
    total_dalys = get_total("daly")
    total_yll = get_total("yll")
    total_yld = get_total("yld")

    # Reconstruct DALYs if only YLL & YLD are present
    if total_dalys is None and (total_yll is not None and total_yld is not None):
        total_dalys = total_yll + total_yld

    if total_inc is None:
        raise ValueError(
            "Incidence not found for the chosen filters; export it from the Results Tool and try again."
        )
    if total_dalys is None:
        raise ValueError(
            "DALYs (or YLLs and YLDs) not found for the chosen filters; export them and try again."
        )

    # Per-case metrics
    daly_per_case = safe_div(total_dalys, total_inc)
    yll_per_case = safe_div(total_yll, total_inc) if total_yll is not None else None
    yld_per_case = safe_div(total_yld, total_inc) if total_yld is not None else None

    # Normalised severity score B
    B = log_normalise(daly_per_case)

    summary = {
        "Disease": disease,
        "Year": int(year) if year is not None else (int(sub["year"].mode()[0]) if "year" in sub.columns else None),
        "Location": location,
        "Sex": sex,
        "Age": age,
        "Metric": metric,
        "Incidence_total": total_inc,
        "DALYs_total": total_dalys,
        "YLLs_total": total_yll,
        "YLDs_total": total_yld,
        "DALY_per_case": daly_per_case,
        "YLL_per_case": yll_per_case,
        "YLD_per_case": yld_per_case,
        "B_score": B,
    }

    return pd.DataFrame([summary])

# ================================================
# Cell 2a — FAST pre-aggregation (one-time) & fast compute
# ================================================

import os


def build_fast_lookup(
    df: pd.DataFrame,
    location: str = "Global",
    sex: str = "Both",
    age: str = "All Ages",
    metric: Optional[str] = None,  # None = keep all metrics (Number, Rate, Percent)
    out_path: str = "gbd_fast.parquet",
) -> str:
    """Pre-filter to standard strata and aggregate to a compact lookup table.
    Preserves **metric_name** so we can later fetch Incidence as **Number**, **Rate**, or **Percent**.

    Result columns: [cause_name, year, measure_key, metric_key, val].
    Saves to Parquet (or CSV fallback).
    """
    # Helper: normalise metric names to simple keys
    def standardise_metric_key(metric_name: str) -> str:
        s = str(metric_name).strip().lower()
        if "number" in s:
            return "number"
        if "rate" in s:
            return "rate"
        if "percent" in s or "%" in s:
            return "percent"
        return s

    mask = (
        df["location_name"].str.lower().eq(location.lower())
        & df["sex_name"].str.lower().eq(sex.lower())
        & df["age_name"].str.lower().eq(age.lower())
    )
    if metric:  # optional filter; by default we keep all metrics
        mask &= df["metric_name"].str.lower().eq(metric.lower())

    slim = df.loc[mask, ["cause_name", "year", "measure_name", "metric_name", "val"]].copy()
    slim["measure_key"] = slim["measure_name"].map(standardise_measure_key)
    slim["metric_key"]  = slim["metric_name"].map(standardise_metric_key)

    agg = slim.groupby(
        ["cause_name", "year", "measure_key", "metric_key"], as_index=False
    )["val"].sum()

    # Save
    try:
        agg.to_parquet(out_path, index=False)
        return out_path
    except Exception:
        csv_alt = out_path.replace(".parquet", ".csv")
        agg.to_csv(csv_alt, index=False)
        return csv_alt


def load_fast_lookup(path: str = "gbd_fast.parquet") -> pd.DataFrame:
    """Load the compact lookup table saved by build_fast_lookup()."""
    if path.endswith(".parquet") and os.path.exists(path):
        try:
            return pd.read_parquet(path)
        except Exception:
            pass
    if os.path.exists(path):
        return pd.read_csv(path)
    alt = path.replace(".parquet", ".csv")
    if os.path.exists(alt):
        return pd.read_csv(alt)
    raise FileNotFoundError(f"Fast lookup not found at {path} or {alt}.")


def compute_per_case_burden_fast(
    agg: pd.DataFrame,
    disease: str,
    year: Optional[int] = None,
    exact: bool = True,
) -> pd.DataFrame:
    """
    Fast per-case burden for a single cause (exact cause). If no exact match,
    we try a substring fallback only to suggest names (and then raise).
    """
    src = agg

    # Exact match
    mask = src["cause_name"].str.lower().eq(disease.lower())
    if year is not None:
        mask &= (src["year"] == year)
    sub = src.loc[mask]

    # Fallback: show suggestions then error (to avoid ambiguity here)
    if sub.empty:
        submask = src["cause_name"].str.contains(disease, case=False, na=False)
        if year is not None:
            submask &= (src["year"] == year)
        sub2 = src.loc[submask]
        if sub2.empty:
            pool = src if year is None else src[src["year"] == year]
            q = disease.lower()
            def score(name: str) -> int:
                name_l = str(name).lower()
                return sum(1 for t in re.findall(r"[a-z0-9]+", q) if t and t in name_l)
            sugg = (pool[["cause_name"]]
                      .drop_duplicates()
                      .assign(_s=lambda d: d["cause_name"].map(score))
                      .sort_values(["_s","cause_name"], ascending=[False, True])
                      .head(15))
            print("No rows for that cause/year in the fast table.")
            try:
                from IPython.display import display
                display(sugg.drop(columns=["_s"]))
            except Exception:
                print(sugg.drop(columns=["_s"]).to_string(index=False))
            raise ValueError("No rows matched. Pick an exact cause (or use the 'contains' function).")
        else:
            # Multiple possible substring matches → list & error
            distinct = sub2["cause_name"].drop_duplicates().tolist()
            print("Your query matched multiple causes via substring:")
            for nm in distinct[:20]:
                print("  -", nm)
            raise ValueError("Multiple matches. Choose an exact cause name from above or use 'contains'.")

    def get_total(key: str) -> Optional[float]:
        s = sub.loc[sub["measure_key"].eq(key), "val"].sum()
        return float(s) if pd.notnull(s) and s != 0 else None

    total_inc = get_total("incidence")
    total_dalys = get_total("daly")
    total_yll = get_total("yll")
    total_yld = get_total("yld")

    if total_dalys is None and (total_yll is not None and total_yld is not None):
        total_dalys = total_yll + total_yld
    if total_inc is None or total_dalys is None:
        raise ValueError("Need Incidence and DALYs (or YLL+YLD) for this cause/year in the fast table.")

    daly_per_case = safe_div(total_dalys, total_inc)
    yll_per_case = safe_div(total_yll, total_inc) if total_yll is not None else None
    yld_per_case = safe_div(total_yld, total_inc) if total_yld is not None else None

    chosen_cause = str(sub["cause_name"].iloc[0])

    return pd.DataFrame([{
        "Disease": disease,
        "Year": int(year) if year is not None else (int(sub["year"].mode()[0]) if "year" in sub.columns else None),
        "Location": "Global",
        "Sex": "Both",
        "Age": "All Ages",
        "Metric": "Number",

        # NEW: provenance of incidence
        "Incidence_mode": "exact",
        "Incidence_chosen_cause": chosen_cause,
        "Incidence_cause_names": chosen_cause,
        "Cause_names_all": chosen_cause,
        "Contains_family_size": 1,

        "Incidence_total": total_inc,
        "DALYs_total": total_dalys,
        "YLLs_total": total_yll,
        "YLDs_total": total_yld,
        "DALY_per_case": daly_per_case,
        "YLL_per_case": yll_per_case,
        "YLD_per_case": yld_per_case,
        "B_score": log_normalise(daly_per_case),
    }])


In [None]:
# ================================================
# Cell 2b — COMPOSITE 'contains' computation (sum all causes matching substring)
# ================================================
"""
Use this when you want to aggregate over a *family* of causes whose labels all contain
some substring, e.g., all causes that include "hepatitis c". By default, the function
**sums DALYs** across all matches. For **incidence**, you can choose:
  - incidence_mode='sum'        → sum incidence over all matches (simple but may overcount if causes are outcomes on different pathways)
  - incidence_mode='acute_only' → pick incidence only from causes whose names match an acute regex (e.g., r"acute hepatitis c"), else fallback to 'sum'
  - incidence_mode='largest'    → pick the single matching cause with the largest incidence
  - incidence_mode='named'      → pick incidence from a specific cause name (pass incidence_name)
"""

def compute_per_case_burden_contains(
    agg: pd.DataFrame,
    substring: str,
    year: Optional[int] = None,
    incidence_mode: Literal['sum','acute_only','largest','named'] = 'sum',
    acute_regex: str = r"acute hepatitis c",
    incidence_name: Optional[str] = None,
) -> pd.DataFrame:
    src = agg
    m = src["cause_name"].str.contains(substring, case=False, na=False)
    if year is not None:
        m &= (src["year"] == year)
    fam = src.loc[m].copy()
    if fam.empty:
        raise ValueError("No causes matched the substring/year in the fast table.")

    all_causes = sorted(fam["cause_name"].dropna().unique().tolist())

    # Sum DALYs across the whole family
    def fam_total(key: str) -> Optional[float]:
        s = fam.loc[fam["measure_key"].eq(key), "val"].sum()
        return float(s) if pd.notnull(s) and s != 0 else None

    daly_sum = fam_total("daly")
    yll_sum = fam_total("yll")
    yld_sum = fam_total("yld")
    if daly_sum is None and (yll_sum is not None and yld_sum is not None):
        daly_sum = yll_sum + yld_sum

    # Incidence selection + provenance
    inc_total = None
    incidence_cause_names: list[str] = []
    chosen_label: Optional[str] = None

    if incidence_mode == 'sum':
        inc_total = fam_total("incidence")
        incidence_cause_names = all_causes

    elif incidence_mode == 'acute_only':
        amask = fam["cause_name"].str.contains(acute_regex, case=False, na=False)
        inc_total = fam.loc[amask & fam["measure_key"].eq("incidence"), "val"].sum()
        incidence_cause_names = sorted(fam.loc[amask, "cause_name"].dropna().unique().tolist())
        if (not pd.notnull(inc_total)) or inc_total == 0:
            print("[contains] acute_only incidence not found; falling back to sum across matches.")
            inc_total = fam_total("incidence")
            incidence_cause_names = all_causes

    elif incidence_mode == 'largest':
        sub_inc = (fam[fam["measure_key"].eq("incidence")]
                   .groupby("cause_name", as_index=False)["val"].sum()
                   .sort_values("val", ascending=False))
        if not sub_inc.empty:
            inc_total = float(sub_inc.iloc[0]["val"])
            chosen_label = str(sub_inc.iloc[0]["cause_name"])
            incidence_cause_names = [chosen_label]
            print(f"[contains] Using largest-incidence cause: {chosen_label}")

    elif incidence_mode == 'named':
        if not incidence_name:
            raise ValueError("incidence_mode='named' requires incidence_name to be provided.")
        nm = fam[fam["cause_name"].str.lower().eq(incidence_name.lower()) & fam["measure_key"].eq("incidence")]
        inc_total = float(nm["val"].sum()) if not nm.empty else None
        incidence_cause_names = [incidence_name] if inc_total is not None else []
        chosen_label = incidence_name if inc_total is not None else None

    else:
        raise ValueError("Unknown incidence_mode.")

    if inc_total is None or daly_sum is None:
        raise ValueError("Need Incidence and DALYs (or YLL+YLD) for the matched family.")

    daly_per_case = safe_div(daly_sum, inc_total)
    yll_pc = safe_div(yll_sum, inc_total) if yll_sum is not None else None
    yld_pc = safe_div(yld_sum, inc_total) if yld_sum is not None else None

    return pd.DataFrame([{
        "Disease": f"contains: {substring}",
        "Year": int(year) if year is not None else (int(fam["year"].mode()[0]) if "year" in fam.columns else None),
        "Location": "Global",
        "Sex": "Both",
        "Age": "All Ages",
        "Metric": "Number",

        # NEW: provenance of incidence
        "Incidence_mode": incidence_mode,
        "Incidence_chosen_cause": chosen_label,
        "Incidence_cause_names": "; ".join(incidence_cause_names) if incidence_cause_names else None,
        "Cause_names_all": "; ".join(all_causes),
        "Contains_family_size": len(all_causes),

        "Incidence_total": inc_total,
        "DALYs_total": daly_sum,
        "YLLs_total": yll_sum,
        "YLDs_total": yld_sum,
        "DALY_per_case": daly_per_case,
        "YLL_per_case": yll_pc,
        "YLD_per_case": yld_pc,
        "B_score": log_normalise(daly_per_case),
    }])

In [None]:
# ================================================
# Cell 3 — Runner & quick cause browser (fast path if available)
# ================================================

def list_causes(
    df: pd.DataFrame | None = None,
    agg: pd.DataFrame | None = None,
    year: Optional[int] = None,
    location: str = "Global",
    sex: str = "Both",
    age: str = "All Ages",
    metric: str = "Number",
    limit: int = 200,
) -> pd.DataFrame:
    """Return a preview of distinct causes available under the chosen strata."""
    if agg is not None:
        m = (agg["year"].eq(year)) if year is not None else pd.Series(True, index=agg.index)
        causes = (agg.loc[m, ["cause_name"]].dropna().drop_duplicates().sort_values("cause_name"))
        return causes.head(limit)

    assert df is not None, "Provide either df or agg"
    mask = (
        df["location_name"].astype(str).str.lower().eq(location.lower())
        & df["sex_name"].astype(str).str.lower().eq(sex.lower())
        & df["age_name"].astype(str).str.lower().eq(age.lower())
        & df["metric_name"].astype(str).str.lower().eq(metric.lower())
    )
    if year is not None and "year" in df.columns:
        mask &= (df["year"] == year)
    causes = df.loc[mask, ["cause_name"]].dropna().drop_duplicates().sort_values("cause_name")
    return causes.head(limit)

# --- Interactive runner ---

FAST_PATH = "gbd_fast.parquet"

# Default CSV path (your Drive export)
_default_csv = "/content/drive/MyDrive/Colab Notebooks/SIS/IHME-GBD_2021_DATA-6ec784b1-1.csv"
try:
    import os
    if not os.path.exists(_default_csv):
        _default_csv = ""
except Exception:
    _default_csv = ""

csv_path = input(f"Path to GBD Results CSV [default {_default_csv or 'enter path'}]: ").strip() or _default_csv
if not csv_path:
    raise SystemExit("Please provide a CSV path (exported from VizHub GBD Results).")

# Load & normalise
_df = load_gbd_results_csv(csv_path)
print(f"Loaded GBD CSV with {_df.shape[0]} rows and {_df.shape[1]} columns.")

# Build/refresh fast lookup
if yesno("Build/refresh fast lookup table (recommended)?", default="y"):
    fast_file = build_fast_lookup(_df, out_path=FAST_PATH)
    print(f"Fast table saved to: {fast_file}")

# Load fast table if present
agg = None
try:
    agg = load_fast_lookup(FAST_PATH)
    print(f"Loaded fast table with {agg.shape[0]} rows.")
except Exception:
    print("Fast table not found — using raw CSV path (slower).")

# Optional: browse all causes (by year)
if yesno("List available causes?", default="n"):
    yr_txt = input("Year to filter (blank = all years in file): ").strip()
    yr = int(yr_txt) if yr_txt else None
    preview = list_causes(_df if agg is None else None, agg=agg, year=yr)
    try:
        from IPython.display import display
        display(preview)
    except Exception:
        print(preview.to_string(index=False))

# Choose disease & year for computation
_disease = input('Disease string (e.g., "Hepatitis C"): ').strip() or "Hepatitis C"
yr_txt = input("Year for computation (e.g., 2021). Leave blank to use all years in file: ").strip()
_year = int(yr_txt) if yr_txt else None

# Ask whether to aggregate by substring first
use_contains = yesno("Use 'contains' aggregation (sum family of causes matching your string)?", default="y")

# If using contains and we have a fast table, show which causes match BEFORE choosing mode
fam = None
preview = None
if agg is not None and use_contains:
    fam_mask = agg["cause_name"].str.contains(_disease, case=False, na=False)
    if _year is not None:
        fam_mask &= agg["year"].eq(_year)
    fam = agg.loc[fam_mask].copy()

    if fam.empty:
        print("No causes matched your string/year in the fast table.")
    else:
        # Build compact preview: incidence (Number / Rate / Percent) and DALYs
        inc_tbl = fam[fam["measure_key"].eq("incidence")].groupby(
            ["cause_name", "metric_key"], as_index=False
        )["val"].sum()
        inc_wide = inc_tbl.pivot(index="cause_name", columns="metric_key", values="val").reset_index()
        inc_wide = inc_wide.rename(
            columns={
                "number": "incidence_number",
                "rate": "incidence_rate_per_100k",
                "percent": "incidence_percent",
            }
        )
        daly_tbl = (
            fam[fam["measure_key"].eq("daly")]
            .groupby("cause_name", as_index=False)["val"]
            .sum()
            .rename(columns={"val": "DALYs"})
        )
        preview = inc_wide.merge(daly_tbl, on="cause_name", how="left")

        # Sort by incidence_number if present, else by DALYs
        if "incidence_number" in preview.columns:
            preview = preview.sort_values("incidence_number", ascending=False)
        elif "DALYs" in preview.columns:
            preview = preview.sort_values("DALYs", ascending=False)

        preview = preview.fillna(0.0).reset_index(drop=True)
        preview.insert(0, "idx", range(len(preview)))

        print("Matched causes and their incidence metrics for your filters (up to 30):")
        cols = ["idx", "cause_name", "incidence_number", "incidence_percent", "incidence_rate_per_100k", "DALYs"]
        cols = [c for c in cols if c in preview.columns]
        print(preview[cols].head(30).to_string(index=False))
        input("\nPress Enter to continue to incidence-mode selection...")

# Now choose incidence mode and run the computation
if agg is not None and use_contains:
    print("Incidence modes: 1) sum, 2) acute_only, 3) largest, 4) list_numbers")
    mode_i = ask_int("Choose incidence mode [1/2/3/4]", default=2, minv=1, maxv=4)

    if mode_i in (1, 2, 3):
        modes = {1: "sum", 2: "acute_only", 3: "largest"}
        sel_mode = modes[mode_i]
        out = compute_per_case_burden_contains(
            agg,
            substring=_disease,
            year=_year,
            incidence_mode=sel_mode,
            acute_regex=r"acute hepatitis c",
            incidence_name=None,
        )
    else:
        # --- Mode 4: user picks indices of causes to include ---
        if fam is None or preview is None or preview.empty:
            raise ValueError("No matched causes to select from. Try a different disease string or year.")
        idx_str = input("Enter cause indices separated by spaces (e.g., '0 1 2'): ").strip()
        try:
            idx_list = [int(x) for x in idx_str.split() if x.strip() != ""]
        except ValueError:
            raise ValueError("Please enter integers separated by spaces, e.g., '0 1 2'.")

        if not idx_list:
            raise ValueError("You must select at least one index.")

        max_idx = int(preview["idx"].max())
        bad = [i for i in idx_list if i < 0 or i > max_idx]
        if bad:
            raise ValueError(f"Index out of range: {bad}. Allowed range is 0..{max_idx}.")

        selected_names = preview.loc[preview["idx"].isin(idx_list), "cause_name"].tolist()
        if not selected_names:
            raise ValueError("No valid causes selected.")

        fam_sel = fam[fam["cause_name"].isin(selected_names)].copy()

        # Sum DALYs/YLL/YLD across selected; INCIDENCE uses NUMBER only
        def sel_sum(meas_key: str, require_number_metric: bool = False) -> Optional[float]:
            m = fam_sel["measure_key"].eq(meas_key)
            if require_number_metric:
                m &= fam_sel["metric_key"].eq("number")
            s = fam_sel.loc[m, "val"].sum()
            return float(s) if pd.notnull(s) and s != 0 else None

        inc_total = sel_sum("incidence", require_number_metric=True)
        daly_sum = sel_sum("daly", require_number_metric=False)
        yll_sum = sel_sum("yll", require_number_metric=False)
        yld_sum = sel_sum("yld", require_number_metric=False)
        if daly_sum is None and (yll_sum is not None and yld_sum is not None):
            daly_sum = yll_sum + yld_sum

        if inc_total is None or daly_sum is None:
            raise ValueError("Need Incidence NUMBER and DALYs (or YLL+YLD) for the selected causes.")

        daly_per_case = safe_div(daly_sum, inc_total)
        yll_pc = safe_div(yll_sum, inc_total) if yll_sum is not None else None
        yld_pc = safe_div(yld_sum, inc_total) if yld_sum is not None else None

        all_causes = sorted(fam["cause_name"].dropna().unique().tolist())
        year_used = int(_year) if _year is not None else (int(fam["year"].mode()[0]) if "year" in fam.columns else None)

        out = pd.DataFrame([{
            "Disease": f"contains: {_disease}",
            "Year": year_used,
            "Location": "Global",
            "Sex": "Both",
            "Age": "All Ages",
            "Metric": "Number",
            "Incidence_mode": "list_numbers",
            "Incidence_chosen_cause": "; ".join(selected_names),
            "Incidence_cause_names": "; ".join(selected_names),
            "Cause_names_all": "; ".join(all_causes),
            "Contains_family_size": len(all_causes),
            "Incidence_total": inc_total,
            "DALYs_total": daly_sum,
            "YLLs_total": yll_sum,
            "YLDs_total": yld_sum,
            "DALY_per_case": daly_per_case,
            "YLL_per_case": yll_pc,
            "YLD_per_case": yld_pc,
            "B_score": log_normalise(daly_per_case),
        }])

elif agg is not None:
    out = compute_per_case_burden_fast(agg, disease=_disease, year=_year, exact=True)
else:
    # Raw fallback (slower)
    out = compute_per_case_burden(
        _df,
        disease=_disease,
        year=_year,
        location="Global",
        sex="Both",
        age="All Ages",
        metric="Number",
    )

# Show & save
try:
    from IPython.display import display
    display(out)
except Exception:
    print(out.to_string(index=False))

out.to_csv("B_severity_from_GBD.csv", index=False)
print("Saved: B_severity_from_GBD.csv")


Path to GBD Results CSV [default /content/drive/MyDrive/Colab Notebooks/SIS/IHME-GBD_2021_DATA-6ec784b1-1.csv]: 
Loaded GBD CSV with 6090 rows and 16 columns.
Build/refresh fast lookup table (recommended)? [y/n] (default y): 
Fast table saved to: gbd_fast.parquet
Loaded fast table with 6090 rows.
List available causes? [y/n] (default n): 
Disease string (e.g., "Hepatitis C"): Hepatitis C
Year for computation (e.g., 2021). Leave blank to use all years in file: 2021
Use 'contains' aggregation (sum family of causes matching your string)? [y/n] (default y): 
Matched causes and their incidence metrics for your filters (up to 30):
 idx                              cause_name  incidence_number  incidence_percent  incidence_rate_per_100k        DALYs
   0     Total burden related to hepatitis C      1.164135e+07           0.000316               147.520274 1.518804e+07
   1                       Acute hepatitis C      7.009910e+06           0.000190                88.830265 2.660914e+05
   2 Ch

Unnamed: 0,Disease,Year,Location,Sex,Age,Metric,Incidence_mode,Incidence_chosen_cause,Incidence_cause_names,Cause_names_all,Contains_family_size,Incidence_total,DALYs_total,YLLs_total,YLDs_total,DALY_per_case,YLL_per_case,YLD_per_case,B_score
0,contains: Hepatitis C,2021,Global,Both,All Ages,Number,list_numbers,Total burden related to hepatitis C,Total burden related to hepatitis C,Acute hepatitis C; Chronic hepatitis C includi...,4,11641350.0,15188040.0,15020260.0,167777.133537,1.304664,1.290251,0.014412,0.724361


Saved: B_severity_from_GBD.csv


In [None]:

# ================================================
# Cell 3b — Non‑interactive runner (no input prompts)
# ================================================

# --- USER SETTINGS ---
CSV_PATH = "/content/drive/MyDrive/Colab Notebooks/SIS/IHME-GBD_2021_DATA-6ec784b1-1.csv"   # <- change to your CSV path if needed
DISEASE  = "Hepatitis C"                                     # e.g., "Acute hepatitis C" or exact cause name
YEAR     = 2021                                              # or None to use all years available
LIST_CAUSES = False                                          # set True to preview cause list
USE_EXACT_MATCH = True                                       # exact cause name match for speed/precision

# Load & normalise
_df2 = load_gbd_results_csv(CSV_PATH)
print(f"Loaded GBD CSV with {_df2.shape[0]} rows and {_df2.shape[1]} columns.")

# Build fast lookup if missing (one-time)
if not (os.path.exists(FAST_PATH) or os.path.exists(FAST_PATH.replace('.parquet','.csv'))):
    fast_file = build_fast_lookup(_df2, out_path=FAST_PATH)
    print(f"Fast table saved to: {fast_file}")

# Load fast table
try:
    agg2 = load_fast_lookup(FAST_PATH)
    print(f"Loaded fast table with {agg2.shape[0]} rows.")
except Exception:
    agg2 = None
    print("Fast table not found — falling back to raw CSV path (slower).")

# Optional preview of causes
if LIST_CAUSES:
    preview2 = list_causes(_df2 if agg2 is None else None, agg=agg2, year=YEAR)
    try:
        from IPython.display import display
        display(preview2)
    except Exception:
        print(preview2.to_string(index=False))

# Compute per-case DALYs and B
if agg2 is not None:
    out2 = compute_per_case_burden_fast(agg2, disease=DISEASE, year=YEAR, exact=USE_EXACT_MATCH)
else:
    out2 = compute_per_case_burden(
        _df2,
        disease=DISEASE,
        year=YEAR,
        location="Global",
        sex="Both",
        age="All Ages",
        metric="Number",
    )

# Show & save
try:
    from IPython.display import display
    display(out2)
except Exception:
    print(out2.to_string(index=False))

out2.to_csv("B_severity_from_GBD.csv", index=False)
print("Saved: B_severity_from_GBD.csv")



Loaded GBD CSV with 6090 rows and 16 columns.
Loaded fast table with 6090 rows.
Your query matched multiple causes via substring:
  - Acute hepatitis C
  - Chronic hepatitis C including cirrhosis
  - Liver cancer due to hepatitis C
  - Total burden related to hepatitis C


ValueError: Multiple matches. Choose an exact cause name from above or use 'contains'.

In [None]:
# ================================================
# Cell 4 — Expected Health Gain per Beneficiary (G)
# (auto-uses incidence settings saved by Cell 3)
# ================================================
from typing import Tuple, Literal, Optional, Dict
import math
import numpy as np
import pandas as pd

# ---------- Utilities ----------
def clamp01(x: float) -> float:
    return max(0.0, min(1.0, float(x)))

def parse_ie(ie: Tuple[float, float]) -> Tuple[float, float, float]:
    if not isinstance(ie, (tuple, list)) or len(ie) != 2:
        raise ValueError("IE must be a 2-element tuple/list: [minIE, maxIE]")
    lo = clamp01(ie[0]); hi = clamp01(ie[1])
    if hi < lo:
        lo, hi = hi, lo
    base = (lo + hi) / 2.0
    return lo, base, hi

def risk_from_probability(p: float) -> float:
    return clamp01(p)

def risk_from_incidence_rate(incidence_per_person_year: float, horizon_years: float) -> float:
    lam = max(0.0, float(incidence_per_person_year))
    H = max(0.0, float(horizon_years))
    return clamp01(1.0 - math.exp(-lam * H))

def risk_from_incidence_per_100k(incidence_per_100k_per_year: float, horizon_years: float) -> float:
    rate = max(0.0, float(incidence_per_100k_per_year)) / 100000.0
    return risk_from_incidence_rate(rate, horizon_years)

# ---------- Pull incidence from fast table ----------
def fetch_incidence_from_fast(
    agg: pd.DataFrame,
    query: str,
    year: Optional[int] = None,
    contains: bool = True,
    incidence_mode: str = "acute_only",   # 'sum'|'acute_only'|'largest'|'named'
    acute_regex: str = r"acute hepatitis c",
    incidence_name: Optional[str] = None,
) -> Dict[str, Optional[float]]:
    src = agg
    m = src["cause_name"].str.contains(query, case=False, na=False) if contains \
        else src["cause_name"].str.lower().eq(query.lower())
    if year is not None:
        m &= src["year"].eq(year)
    fam = src.loc[m].copy()
    if fam.empty:
        return {"number": None, "rate_per_100k": None, "percent": None}

    def get_number_value(frame: pd.DataFrame) -> Optional[float]:
        s = frame.loc[(frame["measure_key"].eq("incidence")) & (frame["metric_key"].eq("number")), "val"].sum()
        return float(s) if pd.notnull(s) and s != 0 else None

    def pick_single_cause(frame: pd.DataFrame) -> Optional[str]:
        if incidence_mode == "named" and incidence_name:
            names = frame["cause_name"].unique().tolist()
            for nm in names:
                if str(nm).lower() == incidence_name.lower():
                    return nm
            return None
        if incidence_mode == "acute_only":
            ac = frame[frame["cause_name"].str.contains(acute_regex, case=False, na=False)]
            if not ac.empty:
                inc_tbl = (
                    ac[ac["metric_key"].eq("number") & ac["measure_key"].eq("incidence")]
                    .groupby("cause_name", as_index=False)["val"].sum()
                    .sort_values("val", ascending=False)
                )
                if not inc_tbl.empty:
                    return str(inc_tbl.iloc[0]["cause_name"])
        # largest (and fallback for sum when we need a single cause for rate/percent)
        inc_tbl = (
            frame[frame["metric_key"].eq("number") & frame["measure_key"].eq("incidence")]
            .groupby("cause_name", as_index=False)["val"].sum()
            .sort_values("val", ascending=False)
        )
        if not inc_tbl.empty:
            return str(inc_tbl.iloc[0]["cause_name"])
        return None

    out: Dict[str, Optional[float]] = {"number": None, "rate_per_100k": None, "percent": None}
    if contains and incidence_mode == "sum":
        out["number"] = get_number_value(fam)
    else:
        chosen = pick_single_cause(fam) if contains else query
        if chosen is not None:
            sub = fam[fam["cause_name"].str.lower().eq(str(chosen).lower())]
            out["number"] = get_number_value(sub)
            r = sub.loc[(sub["measure_key"].eq("incidence")) & (sub["metric_key"].eq("rate")), "val"].sum()
            p = sub.loc[(sub["measure_key"].eq("incidence")) & (sub["metric_key"].eq("percent")), "val"].sum()
            out["rate_per_100k"] = float(r) if pd.notnull(r) and r != 0 else None
            out["percent"] = float(p) if pd.notnull(p) and p != 0 else None
    return out

# ---------- Gain calculators ----------
def gain_infection_prevention(daly_case: float, risk_window: float, IE: Tuple[float, float], adherence: float = 1.0) -> Dict[str, float]:
    lo, base, hi = parse_ie(IE)
    a = clamp01(adherence); rw = clamp01(risk_window); daly = max(0.0, float(daly_case))
    return {"gain_min": rw * a * lo * daly, "gain_base": rw * a * base * daly, "gain_max": rw * a * hi * daly}

def gain_severity_reduction(daly_case: float, risk_window: float, IE: Tuple[float, float], adherence: float = 1.0) -> Dict[str, float]:
    return gain_infection_prevention(daly_case, risk_window, IE, adherence)

def gain_combined(daly_case: float, risk_window: float, IE_infection: Tuple[float, float], IE_severity: Tuple[float, float], adherence: float = 1.0) -> Dict[str, float]:
    li, bi, hi = parse_ie(IE_infection); ls, bs, hs = parse_ie(IE_severity)
    a = clamp01(adherence); rw = clamp01(risk_window); daly = max(0.0, float(daly_case))
    def eff(xi, xs): return clamp01(xi + (1.0 - xi) * xs)
    return {"gain_min": rw * a * eff(li, ls) * daly, "gain_base": rw * a * eff(bi, bs) * daly, "gain_max": rw * a * eff(hi, hs) * daly}

def gain_therapeutic(daly_standard_case: float, IE: Tuple[float, float]) -> Dict[str, float]:
    lo, base, hi = parse_ie(IE); daly = max(0.0, float(daly_standard_case))
    return {"gain_min": lo * daly, "gain_base": base * daly, "gain_max": hi * daly}

# ---------- Map Gain -> G ----------
def normalise_gain_interval(gain_dict: Dict[str, float], gmin: float = 0.001, gmax: float = 20.0) -> Dict[str, float]:
    out = {}
    for k_in, k_out in [("gain_min","G_min"),("gain_base","G_base"),("gain_max","G_max")]:
        val = max(0.0, float(gain_dict.get(k_in, 0.0)))
        out[k_out] = log_normalise(val, xmin=gmin, xmax=gmax)
    return out

# ---------- Load DALY_case ----------
use_csv = yesno("Load DALY_case from B_severity_from_GBD.csv if present?", default="y")
DALY_case = None
if use_csv:
    try:
        bdf = pd.read_csv("B_severity_from_GBD.csv")
        if "DALY_per_case" in bdf.columns:
            DALY_case = float(bdf["DALY_per_case"].iloc[-1])
            print(f"Loaded DALY_case = {DALY_case:.6f} from B_severity_from_GBD.csv")
    except Exception as e:
        print(f"Could not load B_severity_from_GBD.csv: {e}")
if DALY_case is None:
    DALY_case = ask_float("Enter DALY_case (per-case severity)", default=0.5, minv=0.0)

# ---------- Risk window (auto-use Cell 3 settings & CSV RATE/PERCENT) ----------
use_saved_cfg = yesno("Use incidence settings saved from Cell 3 (in B_severity_from_GBD.csv)?", default="y")
risk_window = None
if use_saved_cfg:
    try:
        bdf = pd.read_csv("B_severity_from_GBD.csv")
        last = bdf.iloc[-1]
        # Infer query/contains from the Disease field
        disease_field = str(last.get("Disease", "")).strip()
        if disease_field.lower().startswith("contains:"):
            q = disease_field.split(":", 1)[1].strip()
            contains = True
        else:
            q = disease_field
            contains = False
        mode = str(last.get("Incidence_mode", "acute_only")).strip().lower()
        inc_name = last.get("Incidence_chosen_cause", None)
        inc_name = str(inc_name).strip() if pd.notnull(inc_name) else None
        year_for_inc = int(last.get("Year")) if pd.notnull(last.get("Year")) else None

        agg_fast = load_fast_lookup("gbd_fast.parquet")
        horizon = ask_float("Horizon (years) for risk window", default=1.0, minv=0.0)

        rec = fetch_incidence_from_fast(
            agg_fast,
            query=q or "hepatitis c",
            year=year_for_inc,
            contains=contains,
            incidence_mode=mode or "acute_only",
            acute_regex=r"acute hepatitis c",
            incidence_name=inc_name,
        )
        if rec.get("rate_per_100k") is not None:
            print(f"Using incidence RATE from CSV: {rec['rate_per_100k']} per 100k person-years")
            risk_window = risk_from_incidence_per_100k(rec["rate_per_100k"], horizon)
        elif rec.get("percent") is not None:
            p = rec["percent"] / 100.0
            print(f"Using incidence PERCENT from CSV: {rec['percent']}% per year")
            risk_window = clamp01(1.0 - (1.0 - clamp01(p)) ** horizon)  # discrete-year approx
        elif rec.get("number") is not None:
            # If only absolute number is present (rare), ask for population to derive rate
            pop = ask_float("Incidence NUMBER found; enter population at risk to convert to rate (per year)", minv=1.0, default=1_000_000)
            rate_per_100k = (rec["number"] / pop) * 100000.0
            print(f"Derived RATE ≈ {rate_per_100k:.6f} per 100k person-years")
            risk_window = risk_from_incidence_per_100k(rate_per_100k, horizon)
        else:
            print("Could not find RATE or PERCENT in fast table; switching to manual risk entry.")
    except Exception as e:
        print(f"Using saved settings failed: {e}")

if risk_window is None:
    use_rate_csv = yesno("Use incidence RATE from fast table to set RiskWindow?", default="y")
    if use_rate_csv:
        try:
            agg_fast = load_fast_lookup("gbd_fast.parquet")
            q = input("Substring or exact cause name for incidence (e.g., 'hepatitis c' or 'Acute hepatitis C') [default hepatitis c]: ").strip() or "hepatitis c"
            yr_txt = input("Year for incidence (blank = same as earlier): ").strip()
            yr_inc = int(yr_txt) if yr_txt else None
            contains = yesno("Search by substring family (contains)?", default="y")
            mode = (input("Incidence selection mode for family [sum/acute_only/largest/named] [default acute_only]: ").strip().lower()
                    or "acute_only")
            inc_name = input("Exact cause name (only if mode='named') [blank if not used]: ").strip() or None
            horizon = ask_float("Horizon (years) for risk window", default=60.0, minv=0.0)

            rec = fetch_incidence_from_fast(
                agg_fast, query=q, year=yr_inc, contains=contains,
                incidence_mode=mode, acute_regex=r"acute hepatitis c", incidence_name=inc_name,
            )
            if rec.get("rate_per_100k") is not None:
                print(f"Using incidence RATE from CSV: {rec['rate_per_100k']} per 100k person-years")
                risk_window = risk_from_incidence_per_100k(rec["rate_per_100k"], horizon)
            elif rec.get("percent") is not None:
                p = rec["percent"] / 100.0
                print(f"Using incidence PERCENT from CSV: {rec['percent']}% per year")
                risk_window = clamp01(1.0 - (1.0 - clamp01(p)) ** horizon)
            else:
                print("Could not find RATE or PERCENT in fast table; please enter risk manually.")
        except Exception as e:
            print(f"Fast table not available or lookup failed: {e}")
            risk_window = None

if risk_window is None:
    print("\nRisk window options: 1) direct probability, 2) incidence per person-year, 3) incidence per 100k/year")
    mode_i = ask_int("Choose risk mode [1/2/3]", default=1, minv=1, maxv=3)
    if mode_i == 1:
        risk_window = risk_from_probability(ask_float("RiskWindow (probability 0-1)", default=0.1, minv=0.0, maxv=1.0))
    elif mode_i == 2:
        lam = ask_float("Incidence rate per person-year (e.g., 0.02)", default=0.02, minv=0.0)
        H = ask_float("Horizon (years)", default=1.0, minv=0.0)
        risk_window = risk_from_incidence_rate(lam, H)
    else:
        inc100k = ask_float("Incidence per 100,000 person-years", default=200.0, minv=0.0)
        H = ask_float("Horizon (years)", default=1.0, minv=0.0)
        risk_window = risk_from_incidence_per_100k(inc100k, H)

print(f"Computed RiskWindow = {risk_window:.6f}")

# ---------- Adherence ----------
adh = ask_float("Adherence/uptake α (0-1)", default=1.0, minv=0.0, maxv=1.0)

# ---------- Archetype & IE ----------
print("\nArchetypes: 1) infection prevention, 2) severity reduction, 3) combined, 4) therapeutic")
arch = ask_int("Choose archetype [1/2/3/4]", default=1, minv=1, maxv=4)

if arch in (1, 2):
    ie_lo = ask_float("IE_min (Intervention Efficacy) (0-1)", default=0.5, minv=0.0, maxv=1.0)
    ie_hi = ask_float("IE_max (0-1)", default=0.5, minv=0.0, maxv=1.0)
    gains = gain_infection_prevention(DALY_case, risk_window, (ie_lo, ie_hi), adh) if arch == 1 \
            else gain_severity_reduction(DALY_case, risk_window, (ie_lo, ie_hi), adh)
elif arch == 3:
    iei_lo = ask_float("IE_infection_min (Intervention Efficacy) (0-1)", default=0.5, minv=0.0, maxv=1.0)
    iei_hi = ask_float("IE_infection_max (0-1)", default=0.5, minv=0.0, maxv=1.0)
    ies_lo = ask_float("IE_severity_min (0-1)", default=0.3, minv=0.0, maxv=1.0)
    ies_hi = ask_float("IE_severity_max (0-1)", default=0.3, minv=0.0, maxv=1.0)
    gains = gain_combined(DALY_case, risk_window, (iei_lo, iei_hi), (ies_lo, ies_hi), adh)
else:
    ie_lo = ask_float("IE_min (proportional DALY reduction 0-1)", default=0.3, minv=0.0, maxv=1.0)
    ie_hi = ask_float("IE_max (proportional DALY reduction 0-1)", default=0.3, minv=0.0, maxv=1.0)
    gains = gain_therapeutic(DALY_case, (ie_lo, ie_hi))

# ---------- Normalise to G & Save ----------
Gs = normalise_gain_interval(gains, gmin=0.001, gmax=20.0)
rows = {"DALY_case": DALY_case, "RiskWindow": risk_window, "Adherence": adh, **gains, **Gs}
df_G = pd.DataFrame([rows])
try:
    from IPython.display import display
    display(df_G)
except Exception:
    print(df_G.to_string(index=False))

df_G.to_csv("G_expected_gain.csv", index=False)
print("Saved: G_expected_gain.csv")


Load DALY_case from B_severity_from_GBD.csv if present? [y/n] (default y): 
Loaded DALY_case = 1.304664 from B_severity_from_GBD.csv
Use incidence settings saved from Cell 3 (in B_severity_from_GBD.csv)? [y/n] (default y): 
Horizon (years) for risk window [default 1.0]: 30
Using incidence RATE from CSV: 147.52027393139 per 100k person-years
Computed RiskWindow = 0.043291
Adherence/uptake α (0-1) [default 1.0]: 0.7

Archetypes: 1) infection prevention, 2) severity reduction, 3) combined, 4) therapeutic
Choose archetype [1/2/3/4] [default 1]: 1
IE_min (Intervention Efficacy) (0-1) [default 0.5]: 0.4
IE_max (0-1) [default 0.5]: 0.9


Unnamed: 0,DALY_case,RiskWindow,Adherence,gain_min,gain_base,gain_max,G_min,G_base,G_max
0,1.304664,0.043291,0.7,0.015814,0.025699,0.035583,0.278783,0.327807,0.360666


Saved: G_expected_gain.csv
