# v0 Static Visuals (Standalone)

This notebook recreates the v0 static charts without calling `src/v0_visuals.py`. It reads from `data_raw/` and optionally saves PNG/HTML outputs into `v0/assets/`.

If you are running in Colab, set `PROJECT_ROOT` to the repo folder on your drive.


## 0) Setup

- Update `PROJECT_ROOT` if the auto-detection does not find `data_raw/`.
- Set `SAVE_OUTPUTS = True` to export PNG/HTML into `v0/assets/`.


In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

try:
    import pycountry
except ImportError as exc:
    raise SystemExit("pycountry is required. Install: pip install pycountry") from exc

PROJECT_ROOT = Path.cwd()
if not (PROJECT_ROOT / "data_raw").exists():
    for parent in Path.cwd().parents:
        if (parent / "data_raw").exists():
            PROJECT_ROOT = parent
            break

DATA_RAW = PROJECT_ROOT / "data_raw"
ASSETS_DIR = PROJECT_ROOT / "v0" / "assets"
HTML_DIR = ASSETS_DIR / "html"

SAVE_OUTPUTS = True

if SAVE_OUTPUTS:
    ASSETS_DIR.mkdir(parents=True, exist_ok=True)
    HTML_DIR.mkdir(parents=True, exist_ok=True)

print("Project root:", PROJECT_ROOT)
print("Data dir:", DATA_RAW)


## 1) Helper functions

In [None]:
OVERRIDES = {
    "Bolivia (Plurinational State of)": "BOL",
    "Venezuela (Bolivarian Republic of)": "VEN",
    "Iran (Islamic Republic of)": "IRN",
    "Syrian Arab Republic": "SYR",
    "United Republic of Tanzania": "TZA",
    "Côte d'Ivoire": "CIV",
    "Lao People's Democratic Republic": "LAO",
    "Democratic People's Republic of Korea": "PRK",
    "Republic of Korea": "KOR",
    "Republic of Moldova": "MDA",
    "Viet Nam": "VNM",
    "Cabo Verde": "CPV",
    "Türkiye": "TUR",
    "Czechia": "CZE",
    "North Macedonia": "MKD",
    "Eswatini": "SWZ",
    "United States of America": "USA",
    "Russian Federation": "RUS",
    "Brunei Darussalam": "BRN",
}

def country_to_iso3(name: str) -> str | None:
    if not name or not isinstance(name, str):
        return None
    if name in OVERRIDES:
        return OVERRIDES[name]
    try:
        return pycountry.countries.lookup(name).alpha_3
    except LookupError:
        try:
            matches = pycountry.countries.search_fuzzy(name)
            if len(matches) == 1:
                return matches[0].alpha_3
        except LookupError:
            return None
    return None

def pick_both_sex(df: pd.DataFrame, col: str) -> str:
    values = [v for v in df[col].dropna().unique().tolist()]
    if "Both sexes" in values:
        return "Both sexes"
    if "Both" in values:
        return "Both"
    return values[0] if values else ""

def read_filtered_csv(
    path: Path,
    usecols: list[str],
    filters: dict[str, object],
    chunksize: int = 250000,
) -> pd.DataFrame:
    frames = []
    for chunk in pd.read_csv(path, usecols=usecols, dtype=str, chunksize=chunksize):
        for col, allowed in filters.items():
            if allowed is None:
                continue
            if isinstance(allowed, (list, set, tuple)):
                chunk = chunk[chunk[col].isin([str(x) for x in allowed])]
            else:
                chunk = chunk[chunk[col] == str(allowed)]
        if not chunk.empty:
            frames.append(chunk)
    if not frames:
        return pd.DataFrame(columns=usecols)
    return pd.concat(frames, ignore_index=True)

def save_chart(fig: go.Figure, name: str) -> None:
    if not SAVE_OUTPUTS:
        return
    base = f"v0_{name}"
    png_path = ASSETS_DIR / f"{base}.png"
    html_path = HTML_DIR / f"{base}.html"
    try:
        fig.write_image(png_path, scale=2)
    except Exception as exc:
        print(f"PNG export failed for {name}: {exc}")
    fig.write_html(html_path)


## 2) WHO global (2021)

In [None]:
who_path = DATA_RAW / "who_global_master.csv"
if who_path.exists():
    df = pd.read_csv(who_path)
    both_label = pick_both_sex(df, "sex")
    df = df[df["sex"] == both_label].copy()
    df["age_standardized_suicide_rate_2021"] = pd.to_numeric(
        df["age_standardized_suicide_rate_2021"], errors="coerce"
    )
    df["crude_suicide_rate_2021"] = pd.to_numeric(
        df["crude_suicide_rate_2021"], errors="coerce"
    )
    df["number_suicides_2021"] = pd.to_numeric(
        df["number_suicides_2021"], errors="coerce"
    )
    df["iso3"] = df["country"].apply(country_to_iso3)
    map_df = df[df["iso3"].notna()].copy()

    fig = px.choropleth(
        map_df,
        locations="iso3",
        color="age_standardized_suicide_rate_2021",
        hover_name="country",
        color_continuous_scale="Reds",
        title="WHO 2021 age-standardized suicide rate (Both sexes)",
    )
    save_chart(fig, "who_global_map_age_std")
    fig
else:
    print("Missing:", who_path)


In [None]:
if who_path.exists():
    scatter_df = df.dropna(
        subset=["crude_suicide_rate_2021", "age_standardized_suicide_rate_2021"]
    )
    fig = px.scatter(
        scatter_df,
        x="crude_suicide_rate_2021",
        y="age_standardized_suicide_rate_2021",
        color="income_group",
        size="number_suicides_2021",
        hover_name="country",
        title="Crude vs age-standardized suicide rates (Both sexes)",
    )
    if len(scatter_df) > 2:
        coeff = np.polyfit(
            scatter_df["crude_suicide_rate_2021"],
            scatter_df["age_standardized_suicide_rate_2021"],
            1,
        )
        line_x = np.linspace(
            scatter_df["crude_suicide_rate_2021"].min(),
            scatter_df["crude_suicide_rate_2021"].max(),
            100,
        )
        line_y = coeff[0] * line_x + coeff[1]
        fig.add_trace(go.Scatter(x=line_x, y=line_y, mode="lines", name="Trend"))
    save_chart(fig, "who_scatter_crude_vs_age_std")
    fig


In [None]:
region_files = {
    "who_africa_region_full.csv": "Africa",
    "who_americas_region_full.csv": "Americas",
    "who_emro_region_full.csv": "EMRO",
    "who_europe_region_full.csv": "Europe",
    "who_searo_region_full.csv": "SEARO",
    "who_wpro_region_full.csv": "WPRO",
}
frames = []
for filename, region in region_files.items():
    path = DATA_RAW / filename
    if not path.exists():
        continue
    reg_df = pd.read_csv(path)
    reg_df["region"] = region
    reg_df["age_standardized_suicide_rate_2021"] = pd.to_numeric(
        reg_df["age_standardized_suicide_rate_2021"], errors="coerce"
    )
    frames.append(reg_df)

if frames:
    region_df = pd.concat(frames, ignore_index=True)
    fig = px.violin(
        region_df,
        x="region",
        y="age_standardized_suicide_rate_2021",
        color="sex",
        box=True,
        points="all",
        title="WHO regional distribution of age-standardized rates",
    )
    save_chart(fig, "who_region_violin_age_std")
    fig


## 3) GBD depression DALYs

In [None]:
path = DATA_RAW / "IHME-GBD_2023_DATA-dalys-causes-1.csv"
usecols = [
    "location_name",
    "sex_name",
    "age_name",
    "cause_name",
    "measure_name",
    "metric_name",
    "year",
    "val",
]
filters = {
    "cause_name": "Depressive disorders",
    "measure_name": "DALYs (Disability-Adjusted Life Years)",
    "metric_name": "Rate",
    "sex_name": "Both",
    "year": "2023",
}
if path.exists():
    df = read_filtered_csv(path, usecols, filters)
    df["val"] = pd.to_numeric(df["val"], errors="coerce")
    df = df.dropna(subset=["val"])
    if not df.empty:
        top = (
            df.sort_values("val", ascending=False)
            .groupby("age_name", as_index=False)
            .head(10)
        )
        fig = px.bar(
            top,
            x="val",
            y="location_name",
            facet_col="age_name",
            orientation="h",
            title="Depressive disorders DALYs rate (Top 10 by age group)",
        )
        fig.update_layout(margin=dict(l=230, r=40, t=90, b=40), height=620)
        fig.for_each_annotation(lambda a: a.update(text=a.text.replace("age_name=", "")))
        fig.update_yaxes(matches=None)
        fig.update_xaxes(showticklabels=True)
        save_chart(fig, "gbd_depression_dalys_top10")
        fig
else:
    print("Missing:", path)


## 4) GBD mental and substance deaths

In [None]:
path = DATA_RAW / "IHME-GBD_2023_DATA-deaths-mental-substance-violence-1.csv"
usecols = [
    "location_name",
    "sex_name",
    "age_name",
    "cause_name",
    "measure_name",
    "metric_name",
    "year",
    "val",
]
filters = {
    "measure_name": "Deaths",
    "metric_name": "Rate",
    "year": "2023",
    "age_name": "25+ years",
    "cause_name": ["Self-harm", "Alcohol use disorders", "Drug use disorders"],
}
if path.exists():
    df = read_filtered_csv(path, usecols, filters)
    df["val"] = pd.to_numeric(df["val"], errors="coerce")
    df = df.dropna(subset=["val"])
    if not df.empty:
        top = (
            df.sort_values("val", ascending=False)
            .groupby(["cause_name", "sex_name"], as_index=False)
            .head(10)
        )
        fig = px.bar(
            top,
            x="val",
            y="location_name",
            color="sex_name",
            facet_col="cause_name",
            orientation="h",
            title="Mental/substance deaths rate (Top 10 by cause, 25+ years)",
        )
        fig.for_each_annotation(
            lambda a: a.update(
                text=(
                    a.text.replace("cause_name=", "")
                    .replace("Alcohol use disorders", "Alcohol use<br>disorders")
                    .replace("Drug use disorders", "Drug use<br>disorders")
                ),
                font=dict(size=12),
            )
        )
        fig.update_layout(margin=dict(t=100))
        save_chart(fig, "gbd_mental_deaths_small_multiples")
        fig
else:
    print("Missing:", path)


## 5) GBD age-standardized death rate heatmap

In [None]:
path = DATA_RAW / "IHME-GBD_2023_DATA-age-standardized-death-rate-1.csv"
usecols = [
    "location_name",
    "sex_name",
    "age_name",
    "cause_name",
    "measure_name",
    "metric_name",
    "year",
    "val",
]
filters = {
    "location_name": "Global",
    "measure_name": "Deaths",
    "metric_name": "Rate",
    "year": "2023",
    "age_name": "Age-standardized",
}
if path.exists():
    df = read_filtered_csv(path, usecols, filters)
    df["val"] = pd.to_numeric(df["val"], errors="coerce")
    df = df.dropna(subset=["val"])
    if not df.empty:
        preferred_sex = "Both" if "Both" in df["sex_name"].unique() else df["sex_name"].unique()[0]
        top_causes = (
            df[df["sex_name"] == preferred_sex]
            .sort_values("val", ascending=False)
            .head(20)["cause_name"]
            .tolist()
        )
        pivot = (
            df[df["cause_name"].isin(top_causes)]
            .pivot_table(index="cause_name", columns="sex_name", values="val", aggfunc="mean")
            .sort_values(by=preferred_sex, ascending=False)
        )
        fig = px.imshow(
            pivot,
            color_continuous_scale="RdBu",
            title="Global age-standardized death rate by cause and sex (2023)",
        )
        save_chart(fig, "gbd_age_standardized_heatmap")
        fig
else:
    print("Missing:", path)


## 6) GBD all-cause trends

In [None]:
path = DATA_RAW / "IHME-GBD_2023_DATA-all-cause-burden-all-ages-1.csv"
usecols = [
    "location_name",
    "sex_name",
    "age_name",
    "cause_name",
    "measure_name",
    "metric_name",
    "year",
    "val",
]
filters = {
    "location_name": "Global",
    "age_name": "All ages",
    "cause_name": "All causes",
    "measure_name": "Deaths",
    "metric_name": ["Number", "Rate"],
}
if path.exists():
    df = read_filtered_csv(path, usecols, filters)
    df["val"] = pd.to_numeric(df["val"], errors="coerce")
    df["year"] = pd.to_numeric(df["year"], errors="coerce")
    df = df.dropna(subset=["val", "year"])
    if not df.empty:
        df["metric_name"] = pd.Categorical(
            df["metric_name"],
            categories=["Number", "Rate"],
            ordered=True,
        )
        fig = px.line(
            df,
            x="year",
            y="val",
            color="sex_name",
            facet_col="metric_name",
            markers=True,
            title="Global all-cause deaths trend by metric (All ages)",
        )
        fig.update_yaxes(matches=None)
        save_chart(fig, "gbd_allcause_animated_trend")
        fig
else:
    print("Missing:", path)


## 7) GBD probability of death

In [None]:
prob_path = DATA_RAW / "IHME-GBD_2023_DATA-probability-of-death-1.csv"
usecols = [
    "location_name",
    "sex_name",
    "age_name",
    "cause_name",
    "measure_name",
    "metric_name",
    "year",
    "val",
]
filters = {
    "metric_name": "Probability of death",
    "year": "2023",
}
if prob_path.exists():
    df_prob = read_filtered_csv(prob_path, usecols, filters)
    df_prob["val"] = pd.to_numeric(df_prob["val"], errors="coerce")
    df_prob = df_prob.dropna(subset=["val"])
    if not df_prob.empty:
        sex_label = pick_both_sex(df_prob, "sex_name")
        df_prob = df_prob[df_prob["sex_name"] == sex_label].copy()

        candidate_causes = [
            "Self-harm",
            "Injuries",
            "Substance use disorders",
            "Mental disorders",
            "Alcohol use disorders",
        ]
        candidate_ages = ["25+ years", "20-24 years", "<20 years"]
        selection = None
        for cause in candidate_causes:
            for age in candidate_ages:
                sub = df_prob[(df_prob["cause_name"] == cause) & (df_prob["age_name"] == age)]
                if not sub.empty and sub["val"].nunique() > 1:
                    selection = (sub, cause, age)
                    break
            if selection:
                break

        if selection is None:
            filtered = df_prob[df_prob["age_name"] != "All ages"].copy()
            if filtered.empty:
                filtered = df_prob.copy()
            stats = (
                filtered.groupby(["cause_name", "age_name"], as_index=False)["val"]
                .agg(["std", "count"])
                .sort_values("std", ascending=False)
            )
            stats = stats[stats["count"] > 5]
            if not stats.empty:
                cause, age = stats.index[0]
                sub = filtered[(filtered["cause_name"] == cause) & (filtered["age_name"] == age)]
                selection = (sub, str(cause), str(age))

        if selection is None:
            cause = df_prob["cause_name"].iloc[0]
            age = df_prob["age_name"].iloc[0]
            selection = (df_prob.copy(), str(cause), str(age))

        df_prob, cause_label, age_label = selection
        df_prob["iso3"] = df_prob["location_name"].apply(country_to_iso3)
        map_df = df_prob[df_prob["iso3"].notna()].copy()

        fig = px.choropleth(
            map_df,
            locations="iso3",
            color="val",
            hover_name="location_name",
            color_continuous_scale="Blues",
            title=f"Probability of death ({cause_label}, {age_label}, 2023, {sex_label})",
        )
        save_chart(fig, "gbd_probability_of_death_map")
        fig
else:
    print("Missing:", prob_path)


In [None]:
if prob_path.exists() and "df_prob" in locals() and not df_prob.empty:
    top = df_prob.sort_values("val", ascending=False).head(20)
    fig = px.bar(
        top,
        x="val",
        y="location_name",
        orientation="h",
        title=f"Top 20 probability of death ({cause_label}, {age_label}, 2023)",
    )
    save_chart(fig, "gbd_probability_of_death_top20")
    fig


## 8) GBD risk-factor treemap

In [None]:
path = DATA_RAW / "IHME-GBD_2023_DATA-risk-factor-burden-1.csv"
usecols = [
    "location_name",
    "sex_name",
    "age_name",
    "cause_name",
    "rei_name",
    "measure_name",
    "metric_name",
    "year",
    "val",
]
filters = {
    "location_name": "High-income",
    "measure_name": "Deaths",
    "metric_name": "Percent",
    "year": "2023",
}
if path.exists():
    df = read_filtered_csv(path, usecols, filters)
    df["val"] = pd.to_numeric(df["val"], errors="coerce")
    df = df.dropna(subset=["val"])
    if not df.empty:
        sex_pick = "Male" if "Male" in df["sex_name"].unique() else df["sex_name"].unique()[0]
        df = df[df["sex_name"] == sex_pick]
        fig = px.treemap(
            df,
            path=["cause_name", "rei_name"],
            values="val",
            title=f"Risk-factor burden treemap ({sex_pick}, High-income, 2023)",
        )
        save_chart(fig, "gbd_risk_factor_treemap")
        fig
else:
    print("Missing:", path)


## 9) GBD anemia YLDs

In [None]:
path = DATA_RAW / "IHME-GBD_2023_DATA-anemia-prevalence-ylds-1.csv"
usecols = [
    "location_name",
    "sex_name",
    "age_name",
    "cause_name",
    "rei_name",
    "measure_name",
    "metric_name",
    "year",
    "val",
]
filters = {
    "location_name": "Global",
    "rei_name": "Anemia",
    "measure_name": "YLDs (Years Lived with Disability)",
    "metric_name": "Rate",
    "age_name": "All ages",
    "year": ["2020", "2021", "2022", "2023"],
}
if path.exists():
    df = read_filtered_csv(path, usecols, filters)
    df["val"] = pd.to_numeric(df["val"], errors="coerce")
    df["year"] = pd.to_numeric(df["year"], errors="coerce")
    df = df.dropna(subset=["val", "year"])
    if not df.empty:
        pivot = df.pivot_table(index="year", columns="sex_name", values="val", aggfunc="mean")
        fig = px.imshow(
            pivot,
            color_continuous_scale="Viridis",
            title="Global anemia YLDs rate by sex (2020-2023)",
        )
        save_chart(fig, "gbd_anemia_heatmap")
        fig
else:
    print("Missing:", path)


## 10) Done

If `SAVE_OUTPUTS=True`, your PNG/HTML files are written into `v0/assets/` and `v0/assets/html/`.
