In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import re
from matplotlib.pyplot import subplots, style, rc
from tqdm import tqdm
from venn import venn, pseudovenn
from collections import defaultdict
from itertools import count, islice
from functools import lru_cache

In [46]:
TEX_HEADER = r'\begin{samepage} \begin{table}[h!] \small'
TEX_FOOTER = "\\caption{}\n\\label{}\n\\end{table}\n\\end{samepage}"

In [52]:
def convert_tsv(filename, arm):
    tsv = pd.read_csv(filename, sep="\t", escapechar="#", header=[0,1])
    tsv.columns = pd.MultiIndex.from_tuples([
        (top, bottom if ((bottom != "monomer") and (not bottom.startswith("Unnamed"))) else ".")
        for top, bottom in list(tsv.columns)
    ])
    tsv = tsv[(tsv.iloc[:,1:8]>=.01).any(axis=1)]
    for i in range(1, 8):
        tsv.iloc[:,i] = tsv.iloc[:,i].apply(lambda x: "<0.1" if x < .0005 else format(100*x, ".1f"))
    for i in range(8, 15):
        tsv.iloc[:,i] = tsv.iloc[:,i].apply(lambda x: format(x, ".4f"))
    tsv.iloc[:,15] = tsv.iloc[:,15].apply(lambda x: format(x, ".2e"))
    tsv.insert(loc=0, column=("Arm", ""), value=arm)
    return tsv

preformatted = pd.concat(
    objs=[convert_tsv("PacBio/repeatfinder-q_arm.tsv", arm="q"), convert_tsv("PacBio/repeatfinder-p_arm.tsv", arm="p")],
    axis=0, sort=False,
)

with open("Table-1-repeatfinder.tex", mode="wt") as tex:
    print(
        TEX_HEADER,
        preformatted.to_latex(index=False).rstrip("\n")
            .replace(r'\toprule', r'\hline')
            .replace(r'\midrule', r'\hline')
            .replace(r'\bottomrule', r'\hline'),
        TEX_FOOTER,
        sep="\n", file=tex,
    )

\begin{samepage} \begin{table}[h!] \small
\begin{tabular}{lllllllllllllllll}
\hline
arm &        monomer & \multicolumn{7}{l}{fraction\_explained} & \multicolumn{7}{l}{score} & p\_adjusted \\
    &              . &              HG001 & HG002 & HG003 & HG004 & HG005 & HG006 & HG007 &   HG001 &   HG002 &   HG003 &   HG004 &   HG005 &   HG006 &   HG007 &          . \\
\hline
  q &         TTAGGG &               74.5 &  82.5 &  80.1 &  81.7 &  75.7 &  77.5 &  62.2 &  0.6295 &  0.7126 &  0.6255 &  0.6497 &  0.6113 &  0.5988 &  0.4550 &  9.51e-113 \\
  q &         TTGGGG &                2.5 &   3.4 &   2.8 &   2.8 &   2.4 &   3.1 &   6.6 &  0.0158 &  0.0229 &  0.0175 &  0.0179 &  0.0155 &  0.0197 &  0.0434 &   4.04e-58 \\
  q &        TTAGGGG &                4.6 &   4.8 &   7.2 &   6.0 &   5.1 &   7.6 &   9.0 &  0.0152 &  0.0166 &  0.0200 &  0.0163 &  0.0161 &  0.0232 &  0.0279 &  4.22e-110 \\
  q &         TGAGGG &                1.9 &   2.5 &   1.7 &   2.0 &   3.6 &   2.9 &   4.1 &  0.01