In [None]:
import pandas as pd
import janitor
import forestplot as fp
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
from typing import Iterable, Optional


def save_mpl_fig(
    savepath: str, formats: Optional[Iterable[str]] = None, dpi: Optional[int] = None
) -> None:
    """Save matplotlib figures to ../output.

    Will handle saving in png and in pdf automatically using the same file stem.

    Parameters
    ----------
    savepath: str
        Name of file to save to. No extensions.
    formats: Array-like
        List containing formats to save in. (By default 'png' and 'pdf' are saved).
        Do a:
            plt.gcf().canvas.get_supported_filetypes()
        or:
            plt.gcf().canvas.get_supported_filetypes_grouped()
        To see the Matplotlib-supported file formats to save in.
        (Source: https://stackoverflow.com/a/15007393)
    dpi: int
        DPI for saving in png.

    Returns
    -------
    None
    """
    # Save pdf
    plt.savefig(f"{savepath}.pdf", dpi=None, bbox_inches="tight", pad_inches=0)

    # save png
    plt.savefig(f"{savepath}.png", dpi=dpi, bbox_inches="tight", pad_inches=0)

    # Save additional file formats, if specified
    if formats:
        for format in formats:
            plt.savefig(
                f"{savepath}.{format}",
                dpi=None,
                bbox_inches="tight",
                pad_inches=0,
            )
    return None


# ============================================================================
fp_estimates = "./data/pfas-tf-spresto.dta"
fp_completeness = "./data/pfas-plasma-spresto-completeness-tabulation.csv"

# ============================================================================
pfas = [
    "pfhxs",
    "pfos_linear",
    "pfos_branched",
    "pfoa_linear",
    "pfna",
    "pfhps",
    "pfda",
    "pfhpa",
]

In [None]:
df = (
    pd.read_stata(fp_estimates)
    # ====================================================================
    # Convert the specs into a single column for the pfas name
    .fillna(0)
    .assign(pfas=lambda df_: pd.from_dummies(df_[pfas]))
    .remove_columns(pfas)
    # ==================================================================
    # Get LOD/LOQ values
    .merge(
        pd.read_csv(fp_completeness),
        how="left",
        on="pfas",
        validate="1:1",
    )
    # ========================================================================
    # Format cell values for alignment
    # Pad the measured column
    .assign(measured=lambda df_: df_["measured"].apply(str).str.pad(len("  n")))
    # Pad the measured, < LOD, < LOQ columns
    # So that they are padded to the left
    .assign(
        **{
            k: lambda df_, col=k: df_[col].apply(str).str.pad(len("< loq"), side="left")
            for k in [
                "LOD",
            ]
        }
    )
)
df.to_csv("./data/coefplot-pfas-transport-facilities-spresto.csv", index=False)

df

In [None]:
_opts = dict(
    estimate="beta",
    ll="l95",
    hl="u95",
    varlabel="analyte_code",
    pval="pval",
    annote=["full_analyte_name", "measured", "LOD", "lod"],
    annoteheaders=[
        "Full analyte name",
        "  N",
        "< LOD",
        "LOD value",
    ],
    rightannote=["est_ci", "formatted_pval"],
    right_annoteheaders=["Est. (95% Conf. Int.)", "P-value"],
    table=True,
    variable_header="PFAS",
    decimal_precision=3,
    xlabel="Standardized estimate",
    xlabel_size=17,    xticks=[-0.1, 0, 0.1, 0.2, 0.3],
    symbols=["$^a$", "$^b$", "$^c$"],
    **{
        "marker": "o",  # set maker symbol as diamond
        "markersize": 120,  # adjust marker size
        "xlinestyle": (0, (10, 5)),  # long dash for x-reference line
        "xlinecolor": "#808080",  # gray color for x-reference line
        "xtick_size": 18,  # adjust x-ticker fontsize
        "fontsize": 20,
    },
)

fp.forestplot(
    df.query("pfas in @pfas"),
    **_opts,
)
save_mpl_fig("./figures/coefplot-8pfas-transport-facilities-spresto")