In [1]:
import openml
import pandas as pd

In [4]:
def to_latex(suite_id, maximum_dataset_name_length=None, first_caption=None, second_caption="auto", label=None, filename=None):
    if second_caption == "auto":
        if first_caption.endswith("."):
            second_caption = first_caption[:-1] + " (continued)."
        else:
            second_caption = first_caption + " (continued)"
    
    suite = openml.study.get_suite(suite_id)
    tasks = [openml.tasks.get_task(tid, download_data=False, download_qualities=False) for tid in suite.tasks]

    metadata = openml.datasets.list_datasets(data_id=[t.dataset_id for t in tasks], output_format="dataframe")
    task_data = pd.DataFrame([[t.id, t.dataset_id] for t in tasks], columns=["tid", "did"]).set_index("did")
    metadata = metadata.join(task_data, on="did")

    # Prepare fields for presentation 
    metadata = metadata.rename(columns=dict(
        NumberOfInstances="instances",
        NumberOfFeatures="features",
        NumberOfClasses="classes",
        did="Dataset ID",
        tid="Task ID",
    ))
    metadata[["instances", "features", "classes"]] = metadata[["instances", "features", "classes"]].astype(int)

    columns_to_show = ["Task ID", "name", "instances", "features"]
    if "MinorityClassSize" in metadata:
        metadata["class ratio"] = metadata["MinorityClassSize"] / metadata["MajorityClassSize"]
        columns_to_show.extend(["classes", "class ratio"])
    metadata = metadata.sort_values("name", key= lambda n: n.str.lower())
    
    if maximum_dataset_name_length:
        metadata["name"] = metadata["name"].apply(
            lambda name: name if len(name) < maximum_dataset_name_length else name[:maximum_dataset_name_length - 3] + "..."
        )

    #metadata.style.to_latex("my-table.tex")
    styler = metadata[columns_to_show].style
    styler = styler.format({"class ratio": '{:,.2f}'.format})
    styler = styler.hide(axis="index")

    latex = styler.to_latex()
    latex = latex.replace("_", "\_")
    latex = latex.replace("begin{tabular}", "begin{longtable}")
    latex = latex.replace("end{tabular}", "end{longtable}")
    
    if "class ratio" in columns_to_show:
        latex = latex.replace(r"class ratio", r"class \newline ratio")
        latex = latex.replace(r"rlrrrr", r"rlrrrp{2em}")

    # Add a repeating header 
    start, header, *rows, end = latex.splitlines()
    for i in reversed(range(0, len(rows), 5)):
        rows.insert(i, r"\addlinespace")
    
    table_header = [
        r"\toprule",
        header,
        r"\midrule",
        r"\midrule",
    ]
    
    lines = [
        start,
        
        r"\caption{{{}}}".format(first_caption) if first_caption else "",
        r"\label{{{}}}".format(label) if label else "",
        r"\\" if first_caption or label else "",
        *table_header,
        r"\endfirsthead",
        
        r"\caption*{{{}}}\\".format(second_caption) if second_caption else "",
        *table_header,
        r"\endhead",
        
        *rows,
        r"\bottomrule",
        end,

    ]
    
    filename = filename or f"suite-{suite_id}.tex"
    with open(filename, "w") as fh:
        fh.write("\n".join(lines))

to_latex(269, maximum_dataset_name_length=25, first_caption="Tasks in the AutoML regression suite.", label="tab:269")
to_latex(271, maximum_dataset_name_length=25, first_caption="Tasks in the AutoML classification suite.", label="tab:271")
to_latex(99, maximum_dataset_name_length=25, first_caption="Tasks OpenML-CC18.", label="tab:cc18")

In [73]:
%\begin{adjustbox}{center, width=8cm, totalheight=10cm}
% \begin{longtable}{rrlrrrrr}
% \toprule
% \textbf{ Data id } & \textbf{Task id} & \textbf{ Name } & \textbf{ cl } & \textbf{ p } & \textbf{ n } & \textbf{class ratio}\\
% \midrule

% \midrule
% 3 & 3 & kr-vs-kp & 2 & 37 & 3196 & 0.91\\
% 6 & 6 & letter & 26 & 17 & 20000 & 0.90\\
% 11 & 11 & balance-scale & 3 & 5 & 625 & 0.17\\
% 12 & 12 & mfeat-factors & 10 & 217 & 2000 & 1.00\\
% 14 & 14 & mfeat-fourier & 10 & 77 & 2000 & 1.00\\
% \addlinespace

UsageError: Line magic function `%\begin{adjustbox}{center,` not found.
