# 10) Paper 1 — Conformal Prediction + Robust Optimization

Notebook de soporte para el paper:
- **Objetivo**: materializar tablas y figuras publicables de la frontera robusta + benchmark conformal.
- **Salidas**: `reports/paper_material/paper1/figures/` y `reports/paper_material/paper1/tables/`.

Estructura alineada a paper (IMRaD+):
1. Setup y datos
2. Métricas clave
3. Figuras principales
4. Tablas exportables
5. Notas de validez y reproducibilidad

In [None]:
from __future__ import annotations

import json
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

pio.templates.default = "plotly_white"

PROJECT_ROOT = (
    Path.cwd().resolve().parent if Path.cwd().name == "notebooks" else Path.cwd().resolve()
)
DATA_DIR = PROJECT_ROOT / "data" / "processed"
MODEL_DIR = PROJECT_ROOT / "models"


def load_parquet(name: str) -> pd.DataFrame:
    return pd.read_parquet(DATA_DIR / f"{name}.parquet")


def load_json(name: str, from_models: bool = False) -> dict:
    base = MODEL_DIR if from_models else DATA_DIR
    return json.loads((base / f"{name}.json").read_text())


def ensure_dirs(base: Path) -> dict[str, Path]:
    dirs = {
        "base": base,
        "fig": base / "figures",
        "tbl": base / "tables",
    }
    for d in dirs.values():
        d.mkdir(parents=True, exist_ok=True)
    return dirs


def export_figure(fig: go.Figure, stem: str, out_fig_dir: Path) -> None:
    html_path = out_fig_dir / f"{stem}.html"
    fig.write_html(html_path)
    try:
        png_path = out_fig_dir / f"{stem}.png"
        fig.write_image(png_path, width=1400, height=850, scale=2)
        print(f"Saved: {html_path} and {png_path}")
    except Exception as exc:  # noqa: BLE001
        print(f"Saved HTML only ({html_path}). PNG skipped: {exc}")


def export_table(df: pd.DataFrame, stem: str, out_tbl_dir: Path, max_rows: int = 2000) -> None:
    csv_path = out_tbl_dir / f"{stem}.csv"
    tex_path = out_tbl_dir / f"{stem}.tex"
    out_df = df.copy().head(max_rows)
    out_df.to_csv(csv_path, index=False)
    try:
        latex = out_df.to_latex(index=False, escape=False)
        tex_path.write_text(latex, encoding="utf-8")
        print(f"Saved: {csv_path} and {tex_path}")
    except Exception as exc:  # noqa: BLE001
        print(f"Saved CSV only ({csv_path}). LaTeX skipped: {exc}")

In [None]:
out = ensure_dirs(PROJECT_ROOT / "reports" / "paper_material" / "paper1")
pipeline_summary = load_json("pipeline_summary")
model_comparison = load_json("model_comparison")
conformal_status = load_json("conformal_policy_status", from_models=True)
robust_summary = load_parquet("portfolio_robustness_summary")
robust_frontier = load_parquet("portfolio_robustness_frontier")
variant_benchmark = load_parquet("conformal_variant_benchmark")
variant_by_group = load_parquet("conformal_variant_benchmark_by_group")
print("Loaded shapes:")
print("robust_summary", robust_summary.shape)
print("robust_frontier", robust_frontier.shape)
print("variant_benchmark", variant_benchmark.shape)
print("variant_by_group", variant_by_group.shape)

In [None]:
pipeline = pipeline_summary.get("pipeline", {})
pd_metrics = model_comparison.get("final_test_metrics", {})
metrics = pd.DataFrame(
    [
        {"metric": "pd_auc", "value": pd_metrics.get("auc_roc", np.nan)},
        {"metric": "coverage_90", "value": conformal_status.get("coverage_90", np.nan)},
        {"metric": "coverage_95", "value": conformal_status.get("coverage_95", np.nan)},
        {"metric": "avg_width_90", "value": conformal_status.get("avg_width_90", np.nan)},
        {"metric": "robust_return", "value": pipeline.get("robust_return", np.nan)},
        {"metric": "nonrobust_return", "value": pipeline.get("nonrobust_return", np.nan)},
        {"metric": "price_of_robustness", "value": pipeline.get("price_of_robustness", np.nan)},
    ]
)
metrics

In [None]:
# Figure 1: robust vs non-robust return by risk tolerance
plot_df = robust_summary.melt(
    id_vars=["risk_tolerance"],
    value_vars=["baseline_nonrobust_return", "best_robust_return"],
    var_name="policy",
    value_name="return_net",
)
fig1 = px.bar(
    plot_df,
    x="risk_tolerance",
    y="return_net",
    color="policy",
    barmode="group",
    title="Paper1-Fig1: Net Return by Risk Tolerance",
)
fig1
export_figure(fig1, "paper1_fig1_return_by_tolerance", out["fig"])

In [None]:
# Figure 2: price of robustness (%)
fig2 = px.line(
    robust_summary,
    x="risk_tolerance",
    y="price_of_robustness_pct",
    markers=True,
    title="Paper1-Fig2: Price of Robustness (%)",
)
fig2
export_figure(fig2, "paper1_fig2_price_of_robustness_pct", out["fig"])

In [None]:
# Figure 3: conformal benchmark trade-off
fig3 = px.scatter(
    variant_benchmark,
    x="avg_width",
    y="min_group_coverage",
    color="variant",
    size="coverage",
    hover_data=["coverage_gap", "std_group_coverage"],
    title="Paper1-Fig3: Width vs Min Group Coverage",
)
fig3
export_figure(fig3, "paper1_fig3_conformal_tradeoff", out["fig"])

In [None]:
# Export core tables for manuscript
export_table(metrics, "paper1_table0_key_metrics", out["tbl"])
export_table(robust_summary, "paper1_table1_robustness_summary", out["tbl"])
export_table(variant_benchmark, "paper1_table2_conformal_variant_benchmark", out["tbl"])
export_table(variant_by_group, "paper1_tableA1_benchmark_by_group", out["tbl"])
export_table(robust_frontier, "paper1_tableA2_robustness_frontier", out["tbl"], max_rows=5000)

## Threats to Validity (draft)
- Sensibilidad a restricciones de optimización y configuración de penalizaciones.
- Riesgo de extrapolación fuera del dominio Lending Club.
- Cobertura conformal y utilidad económica no son equivalentes; se deben reportar ambos ejes.

## Reproducibilidad
```bash
uv run dvc repro generate_conformal benchmark_conformal_variants optimize_portfolio optimize_portfolio_tradeoff
uv run pytest -q
```