In [1]:
import pandas as pd

In [9]:
# give me a random pd table
import pandas as pd
import numpy as np

# Create a random pandas DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 40, 45],
    'Salary': [50000, 55000, 60000, 65000, 70000]
}



In [10]:
# put data into a pd dataframe

data = pd.DataFrame(data)

In [11]:
data

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,55000
2,Charlie,35,60000
3,David,40,65000
4,Eve,45,70000


In [9]:
data.to_latex()

'\\begin{tabular}{llrr}\n\\toprule\n & Name & Age & Salary \\\\\n\\midrule\n0 & Alice & 25 & 50000 \\\\\n1 & Bob & 30 & 55000 \\\\\n2 & Charlie & 35 & 60000 \\\\\n3 & David & 40 & 65000 \\\\\n4 & Eve & 45 & 70000 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [12]:
print(data.to_latex())

\begin{tabular}{llrr}
\toprule
 & Name & Age & Salary \\
\midrule
0 & Alice & 25 & 50000 \\
1 & Bob & 30 & 55000 \\
2 & Charlie & 35 & 60000 \\
3 & David & 40 & 65000 \\
4 & Eve & 45 & 70000 \\
\bottomrule
\end{tabular}



In [3]:
import re

def beautify_latex_table(
    latex_code: str,
    caption_name: str = "Table Caption",
    midrule_lines: list = None,
    colored_rows: list = None,
) -> str:
    """
    Beautifies LaTeX code from DataFrame.to_latex() with added table environment,
    midrules, row coloring, and bold headers. Fixes the header formatting bug.

    Parameters:
    - latex_code (str): LaTeX string from DataFrame.to_latex()
    - caption_name (str): Caption text for the table
    - midrule_lines (list): List of row indices (0-based) after which to insert \midrule
    - colored_rows (list): List of tuples (row_index, color_name) for applying \rowcolor

    Returns:
    - str: Formatted LaTeX table string
    """
    midrule_lines = midrule_lines or []
    colored_rows = colored_rows or []

    lines = latex_code.strip().split("\n")

    # Extract header index
    header_idx = next(i for i, line in enumerate(lines) if '&' in line and '\\\\' in line)

    # Bold headers correctly (exclude \\ from bolding)
    header_line = lines[header_idx]
    match = re.match(r"(.*?\\\\)", header_line.strip())
    if match:
        content = match.group(1).replace('\\\\', '')
        headers = [f"\\textbf{{{col.strip()}}}" for col in content.split("&")]
        lines[header_idx] = " & ".join(headers) + " \\\\"

    # Insert midrules and row colors (process from bottom to top to avoid index shifts)
    data_start = header_idx + 2  # skip \midrule after header

    for row_idx, color in sorted(colored_rows, key=lambda x: -x[0]):
        insert_idx = data_start + row_idx
        lines.insert(insert_idx, f"\\rowcolor{{{color}}}")

    for mid_idx in sorted(midrule_lines, reverse=True):
        insert_idx = data_start + mid_idx
        lines.insert(insert_idx, "\\midrule")

    # Wrap with table environment
    table_env = [
        "\\begin{table}[h!]",
        "\\centering",
        "\\small",
        "\\setlength{\\tabcolsep}{4pt}",
        *lines,
        f"\\caption{{{caption_name}}}",
        "\\end{table}"
    ]

    return "\n".join(table_env)

In [27]:
latex_raw = data.to_latex(index=False)

beautified = beautify_latex_table(
    latex_code=latex_raw,
    caption_name="Generate Test Results.",
    midrule_lines=[2, 3],
    colored_rows=[(4, 'lightblue')]
)

print(beautified)

\begin{table}[h!]
\centering
\small
\setlength{\tabcolsep}{4pt}
\begin{tabular}{lrr}
\toprule
\textbf{Name} & \textbf{Age} & \textbf{Salary} \\
\midrule
Alice & 25 & 50000 \\
Bob & 30 & 55000 \\
\midrule
Charlie & 35 & 60000 \\
\midrule
David & 40 & 65000 \\
\rowcolor{lightblue}
Eve & 45 & 70000 \\
\bottomrule
\end{tabular}
\caption{Generate Test Results.}
\end{table}


In [28]:
print(beautify_latex_table(
    latex_code=latex_raw,
    caption_name="Generate Test Results.",
))

\begin{table}[h!]
\centering
\small
\setlength{\tabcolsep}{4pt}
\begin{tabular}{lrr}
\toprule
\textbf{Name} & \textbf{Age} & \textbf{Salary} \\
\midrule
Alice & 25 & 50000 \\
Bob & 30 & 55000 \\
Charlie & 35 & 60000 \\
David & 40 & 65000 \\
Eve & 45 & 70000 \\
\bottomrule
\end{tabular}
\caption{Generate Test Results.}
\end{table}


In [6]:
import pandas as pd

# Create a model performance comparison DataFrame
data = {
    'Model': ['GPT-4', 'Claude-3', 'Llama-2-70B', 'Mistral-7B', 'Gemini-Pro'],
    'Parameters': ['1.76T', 'Unknown', '70B', '7B', 'Unknown'],
    'MMLU': [86.4, 84.2, 68.9, 64.2, 79.1],
    'GSM8K': [92.0, 88.5, 56.8, 52.3, 85.7],
    'HumanEval': [67.0, 71.2, 29.9, 30.1, 63.8],
    'Average': [81.8, 81.3, 51.9, 48.9, 76.2]
}

# Create DataFrame
df = pd.DataFrame(data)

# Generate LaTeX table with beautification
latex_raw = df.to_latex(index=False)

beautified = beautify_latex_table(
    latex_code=latex_raw,
    caption_name="Model Performance Comparison Across Different Benchmarks",
    midrule_lines=[1, 3],  # Add lines after top 2 models and before last 2
    colored_rows=[(0, 'lightblue'), (1, 'lightgray')]  # Highlight top 2 models
)

print(beautified)

\begin{table}[h!]
\centering
\small
\setlength{\tabcolsep}{4pt}
\begin{tabular}{llrrrr}
\toprule
\textbf{Model} & \textbf{Parameters} & \textbf{MMLU} & \textbf{GSM8K} & \textbf{HumanEval} & \textbf{Average} \\
\midrule
\rowcolor{lightblue}
\midrule
GPT-4 & 1.76T & 86.400000 & 92.000000 & 67.000000 & 81.800000 \\
\rowcolor{lightgray}
\midrule
Claude-3 & Unknown & 84.200000 & 88.500000 & 71.200000 & 81.300000 \\
Llama-2-70B & 70B & 68.900000 & 56.800000 & 29.900000 & 51.900000 \\
Mistral-7B & 7B & 64.200000 & 52.300000 & 30.100000 & 48.900000 \\
Gemini-Pro & Unknown & 79.100000 & 85.700000 & 63.800000 & 76.200000 \\
\bottomrule
\end{tabular}
\caption{Model Performance Comparison Across Different Benchmarks}
\end{table}


In [8]:
# Ablation study table
ablation_data = {
    'Configuration': ['Full Model', 'w/o Attention', 'w/o Positional Encoding', 'w/o Layer Norm', 'w/o Residual Connections'],
    'BLEU': [34.2, 28.7, 31.5, 29.8, 26.3],
    'ROUGE-L': [42.8, 37.1, 39.6, 38.2, 35.7],
    'Perplexity': [12.4, 18.9, 15.7, 16.8, 21.2],
    'Training Time (hrs)': [24, 20, 22, 18, 16]
}

df_ablation = pd.DataFrame(ablation_data)

# Generate beautified LaTeX table
latex_raw_ablation = df_ablation.to_latex(index=False)

beautified_ablation = beautify_latex_table(
    latex_code=latex_raw_ablation,
    caption_name="Ablation Study Results on Translation Task",
    midrule_lines=[1],  # Separate full model from ablated versions
    colored_rows=[(0, 'lightblue')]  # Highlight the full model
)

print(beautified_ablation)

\begin{table}[h!]
\centering
\small
\setlength{\tabcolsep}{4pt}
\begin{tabular}{lrrrr}
\toprule
\textbf{Configuration} & \textbf{BLEU} & \textbf{ROUGE-L} & \textbf{Perplexity} & \textbf{Training Time (hrs)} \\
\midrule
\rowcolor{lightblue}
\midrule
Full Model & 34.200000 & 42.800000 & 12.400000 & 24 \\
w/o Attention & 28.700000 & 37.100000 & 18.900000 & 20 \\
w/o Positional Encoding & 31.500000 & 39.600000 & 15.700000 & 22 \\
w/o Layer Norm & 29.800000 & 38.200000 & 16.800000 & 18 \\
w/o Residual Connections & 26.300000 & 35.700000 & 21.200000 & 16 \\
\bottomrule
\end{tabular}
\caption{Ablation Study Results on Translation Task}
\end{table}


## Benchmark

In [None]:
import cs336_systems.benchmark as benchmark 

