In [None]:
import re
from cogent3 import load_table, make_table, open_data_store
from mdeq.utils import load_from_sqldb
from project_paths import TABLE_DIR, DATA_DIR, RESULT_DIR

_num = re.compile(r"\d+")


def get_pvalues(path):
    table = load_table(path)
    table.columns["rank"] = [_num.findall(v)[0] for v in table.columns["name"]]
    return table[:, ["rank", "bootstrap_pval"]]


def get_delta_nabla(path):
    table = load_table(path)
    table.columns["rank"] = [_num.findall(v)[0] for v in table.columns["source"]]
    return table[:, ["rank", "delta_nabla", "std"]]


def get_alignment_lengths(path):
    dstore = open_data_store(path)
    loader = load_from_sqldb()
    rows = []
    for m in dstore:
        aln = loader(m)
        mmu = aln.get_lengths()["mmu"]
        rows.append([m.unique_id, mmu])

    table = make_table(header=["name", "length"], data=rows)
    table.columns["rank"] = [_num.findall(v)[0] for v in table.columns["name"]]
    return table[:, ["rank", "length"]]


def merged(pvals, delta_nabla, align_lengths):
    pvals.index_name = delta_nabla.index_name = align_lengths.index_name = "rank"

    m = pvals.inner_join(delta_nabla)
    m = m.inner_join(align_lengths, digits=2)
    old_cols = [c for c in m.columns if c.startswith("right")]
    new_cols = [c.replace("right_", "") for c in old_cols]
    m = m.with_new_header(old_cols, new_cols)
    return m.sorted(columns="rank")


def _format_element(x):
    return f"{x:.2f}" if type(x) == float else x


def make_latex_table(table):
    """changes to row"""
    table.columns["length"] = [f"{e:,}" for e in table.columns["length"]]
    m = table.transposed("", select_as_header="rank", digits=2)
    for c, col in m.columns.items():
        m.columns[c] = [_format_element(e) for e in col]

    data = m.columns.to_dict()
    data[""] = [
        r"$\hat{p}$-value",
        r"$\hat\delta_{\nabla}$",
        r"$\hat\sigma_\nabla$",
        "length",
    ]
    return make_table(
        data=data,
        title=r"The magnitude of mutation disequilibrium is higher in the region of the \emph{Fxy} gene within the PAR.",
        legend=r"TOE $\hat{p}$-value and $\hat\delta_{\nabla}$ for the first six 5'- introns of \emph{M. musculus}. Column title is the \emph{M. musculus} intron rank. Data is from alignments of \emph{M. musculus}, \emph{M. spretus}, and \emph{R. norvegicus}. \emph{M. musculus} was treated as the foreground branch in model fitting. $\hat\sigma_\nabla$ is the estimated standard deviation of $\nabla$ from the null distribution. The length of each \emph{M. musculus} intron is presented.",
    )


In [None]:
aln_db_path = DATA_DIR / "fxy" / "introns-aligned-filtered.sqlitedb"
print(aln_db_path.exists(), str(aln_db_path))
align_lengths = get_alignment_lengths(aln_db_path)

pvalues = get_pvalues(RESULT_DIR / "fxy"  / "toe/toe-fxy-intron-mmu.tsv")
dnabla = get_delta_nabla(RESULT_DIR / "fxy" / "convergence/convergence-fxy-intron-mmu.tsv")
joint = merged(pvalues, dnabla, align_lengths)
joint

In [None]:
m = make_latex_table(joint)

In [None]:
latex = m.to_latex(label="tab:fxy")
outfile = TABLE_DIR / "fxy.tex"
outfile.write_text(latex)