In [1]:
import csv
import io
from collections import defaultdict

def generate_performance_table(csv_data):
    """
    Parses CSV performance data and generates a LaTeX table string.

    The table is structured by ProblemSize, then Algorithm, with NumThreads
    as column headers displaying ExecutionTimeMs. Non-converged runs are
    marked with an asterisk (*).

    Args:
        csv_data (str): A string containing the CSV data.

    Returns:
        str: The complete LaTeX document source code.
    """
    
    # 1. Parse and Structure the Data
    # Structure: results[problem_size][algorithm][num_threads] = (execution_time_ms, converged)
    results = defaultdict(lambda: defaultdict(dict))
    
    # Use io.StringIO to treat the string as a file for the csv reader
    data_file = io.StringIO(csv_data)
    reader = csv.reader(data_file)
    
    # Skip the header row
    next(reader) 
    
    # Pre-define the expected thread columns
    thread_columns = [1, 2, 4, 8]
    
    for row in reader:
        try:
            algorithm = row[0]
            problem_size = int(row[1])
            num_threads = int(row[2])
            exec_time = int(row[3])
            # Check for "true" or "false" case-insensitively
            converged = row[4].lower() == 'true' 
            
            # Store data
            results[problem_size][algorithm][num_threads] = (exec_time, converged)
            
        except (IndexError, ValueError) as e:
            # Simple error handling for malformed rows
            print(f"Skipping malformed row: {row}. Error: {e}")
            continue

    # 2. Extract unique sorted keys for iteration
    sorted_problem_sizes = sorted(results.keys())
    # Find all unique algorithms across all problem sizes
    all_algorithms = sorted(set(alg for size in results for alg in results[size]))
    
    # 3. Start LaTeX generation
    
    # LaTeX document structure using booktabs for professional tables
    latex_code = [
        r'\documentclass[10pt]{article}',
        r'\usepackage{booktabs}',
        r'\usepackage{amsmath}',
        r'\usepackage[a4paper, margin=1in]{geometry}',
        r'\title{Algorithm Performance Benchmarks}',
        r'\author{}',
        r'\date{}',
        r'\begin{document}',
        r'\maketitle',
        r'',
        r'\section*{Execution Time by Problem Size and Thread Count}',
        r'',
        r'This table summarizes the execution time in milliseconds (ms) for various algorithms across different problem sizes and thread counts. Non-converged runs are marked with an asterisk ($^{*}$).',
        r'',
        # Begin the table environment
        r'\begin{table}[h!]',
        r'\centering',
        r'\caption{Parallel Performance Metrics (Execution Time in ms)}',
        r'\label{tab:performance_summary}',
        # Define table columns: two L columns for labels, four C columns for thread times
        r'\begin{tabular}{@{}llcccc@{}}',
        r'\toprule',
        # Header Row 1: Merge columns for Execution Time
        r'Problem Size ($N$) & Algorithm & \multicolumn{4}{c}{Execution Time (ms) by Number of Threads} \\',
        r'\cmidrule(lr){3-6}',
        # Header Row 2: Specific thread counts
        r' & & 1 & 2 & 4 & 8 \\',
        r'\midrule'
    ]
    
    current_size_group_row_count = 0
    
    # 4. Generate Table Rows
    for i, size in enumerate(sorted_problem_sizes):
        # Determine the algorithms present for this size and sort them
        current_algorithms = sorted(results[size].keys())
        
        # Calculate the number of rows for this ProblemSize group
        num_rows_in_group = len(current_algorithms)
        
        for j, alg in enumerate(current_algorithms):
            
            row_cells = []
            
            # 4a. Problem Size Cell (Only print once per group)
            # Use \multirow to span the size over all rows in the group
            if j == 0:
                row_cells.append(f'\\multirow{{{num_rows_in_group}}}{{*}}{{{size}}}')
            else:
                row_cells.append('')
                
            # 4b. Algorithm Name Cell
            row_cells.append(alg.replace('_', '\\_')) # Escape underscores for LaTeX
            
            # 4c. Thread Execution Times
            for threads in thread_columns:
                
                # Default to '-' if data is missing for a specific thread count
                time_str = '-' 
                
                if threads in results[size][alg]:
                    time_ms, converged = results[size][alg][threads]
                    time_str = str(time_ms)
                    if not converged:
                        time_str += r'$^*$' # Mark non-converged runs
                        
                row_cells.append(time_str)
            
            # Join cells and add to LaTeX code
            latex_code.append(' & '.join(row_cells) + r' \\')
            
        # Add a midrule after each Problem Size group, except the last one
        if i < len(sorted_problem_sizes) - 1:
            latex_code.append(r'\midrule')
            
    # 5. End LaTeX environment and add footnote
    latex_code.extend([
        r'\bottomrule',
        r'\end{tabular}',
        r'\begin{flushleft}',
        r'\footnotesize $^{*}$Execution did not converge within the timeout limit (60 seconds).',
        r'\end{flushleft}',
        r'\end{table}',
        r'',
        r'\end{document}'
    ])

    return '\n'.join(latex_code)

# --- CSV Data Definition ---
# The data is provided as a multiline string for portability.
csv_input = """Algorithm,ProblemSize,NumThreads,ExecutionTimeMs,Converged
StableMarriage,10,1,79,true
StableMarriage,10,2,48,true
StableMarriage,10,4,43,true
StableMarriage,10,8,58,true
StableMarriage,50,1,51,true
StableMarriage,50,2,55,true
StableMarriage,50,4,57,true
StableMarriage,50,8,55,true
StableMarriage,100,1,51,true
StableMarriage,100,2,49,true
StableMarriage,100,4,54,true
StableMarriage,100,8,56,true
StableMarriage,500,1,2180,true
StableMarriage,500,2,1905,true
StableMarriage,500,4,828,true
StableMarriage,500,8,817,true
StableMarriage,10000,1,60023,false
StableMarriage,10000,2,60009,false
StableMarriage,10000,4,60041,false
StableMarriage,10000,8,60099,false
FastComponents,10,1,41,true
FastComponents,10,2,49,true
FastComponents,10,4,39,true
FastComponents,10,8,53,true
FastComponents,50,1,47,true
FastComponents,50,2,43,true
FastComponents,50,4,43,true
FastComponents,50,8,41,true
FastComponents,100,1,44,true
FastComponents,100,2,42,true
FastComponents,100,4,41,true
FastComponents,100,8,40,true
FastComponents,500,1,49,true
FastComponents,500,2,49,true
FastComponents,500,4,47,true
FastComponents,500,8,46,true
FastComponents,10000,1,1052,true
FastComponents,10000,2,996,true
FastComponents,10000,4,548,true
FastComponents,10000,8,674,true
BellmanFord,10,1,60,true
BellmanFord,10,2,56,true
BellmanFord,10,4,59,true
BellmanFord,10,8,56,true
BellmanFord,50,1,52,true
BellmanFord,50,2,55,true
BellmanFord,50,4,51,true
BellmanFord,50,8,53,true
BellmanFord,100,1,55,true
BellmanFord,100,2,51,true
BellmanFord,100,4,56,true
BellmanFord,100,8,49,true
BellmanFord,500,1,53,true
BellmanFord,500,2,43,true
BellmanFord,500,4,45,true
BellmanFord,500,8,48,true
BellmanFord,10000,1,122,true
BellmanFord,10000,2,74,true
BellmanFord,10000,4,65,true
BellmanFord,10000,8,68,true
Johnson,10,1,9049,true
Johnson,10,2,58,true
Johnson,10,4,40,true
Johnson,10,8,46,true
Johnson,50,1,23839,true
Johnson,50,2,47,true
Johnson,50,4,45,true
Johnson,50,8,41,true
Johnson,100,1,60044,false
Johnson,100,2,93,true
Johnson,100,4,51,true
Johnson,100,8,60,true
Johnson,500,1,60010,false
Johnson,500,2,99,true
Johnson,500,4,89,true
Johnson,500,8,100,true
Johnson,10000,1,60010,false
Johnson,10000,2,2201,true
Johnson,10000,4,818,true
Johnson,10000,8,344,true
Boruvka,10,1,89,true
Boruvka,10,2,97,true
Boruvka,10,4,73,true
Boruvka,10,8,86,true
Boruvka,50,1,97,true
Boruvka,50,2,90,true
Boruvka,50,4,83,true
Boruvka,50,8,99,true
Boruvka,100,1,92,true
Boruvka,100,2,96,true
Boruvka,100,4,90,true
Boruvka,100,8,95,true
Boruvka,500,1,89,true
Boruvka,500,2,90,true
Boruvka,500,4,88,true
Boruvka,500,8,83,true
Boruvka,10000,1,102,true
Boruvka,10000,2,95,true
Boruvka,10000,4,99,true
Boruvka,10000,8,103,true
"""

# Generate the LaTeX code
latex_output = generate_performance_table(csv_input)

# Print the generated LaTeX code
print(latex_output)

\documentclass[10pt]{article}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage[a4paper, margin=1in]{geometry}
\title{Algorithm Performance Benchmarks}
\author{}
\date{}
\begin{document}
\maketitle

\section*{Execution Time by Problem Size and Thread Count}

This table summarizes the execution time in milliseconds (ms) for various algorithms across different problem sizes and thread counts. Non-converged runs are marked with an asterisk ($^{*}$).

\begin{table}[h!]
\centering
\caption{Parallel Performance Metrics (Execution Time in ms)}
\label{tab:performance_summary}
\begin{tabular}{@{}llcccc@{}}
\toprule
Problem Size ($N$) & Algorithm & \multicolumn{4}{c}{Execution Time (ms) by Number of Threads} \\
\cmidrule(lr){3-6}
 & & 1 & 2 & 4 & 8 \\
\midrule
\multirow{5}{*}{10} & BellmanFord & 60 & 56 & 59 & 56 \\
 & Boruvka & 89 & 97 & 73 & 86 \\
 & FastComponents & 41 & 49 & 39 & 53 \\
 & Johnson & 9049 & 58 & 40 & 46 \\
 & StableMarriage & 79 & 48 & 43 & 58 \\
\midrule
\multirow{5}{*}{

In [2]:
import csv
import io
from collections import defaultdict

def generate_performance_table(csv_data):
    """
    Parses CSV performance data and generates a LaTeX table string.

    The table is structured by ProblemSize, with NumThreads as column headers
    displaying ExecutionTimeMs. Non-converged runs are marked with an asterisk (*).
    (This version is optimized for single-algorithm data, focusing on ProblemSize
    and Thread count.)

    Args:
        csv_data (str): A string containing the CSV data.

    Returns:
        str: The complete LaTeX document source code.
    """
    
    # 1. Parse and Structure the Data
    # Structure: results[problem_size][algorithm][num_threads] = (execution_time_ms, converged)
    results = defaultdict(lambda: defaultdict(dict))
    
    # Use io.StringIO to treat the string as a file for the csv reader
    data_file = io.StringIO(csv_data)
    reader = csv.reader(data_file)
    
    # Skip the header row
    try:
        next(reader) 
    except StopIteration:
        # Handle empty file case
        return "% Empty CSV data provided."
    
    # Pre-define the expected thread columns
    thread_columns = [1, 2, 4, 8]
    
    for row in reader:
        try:
            algorithm = row[0]
            problem_size = int(row[1])
            num_threads = int(row[2])
            exec_time = int(row[3])
            # Check for "true" or "false" case-insensitively
            converged = row[4].lower() == 'true' 
            
            # Store data
            results[problem_size][algorithm][num_threads] = (exec_time, converged)
            
        except (IndexError, ValueError) as e:
            # Simple error handling for malformed rows
            print(f"Skipping malformed row: {row}. Error: {e}")
            continue

    # 2. Extract unique sorted keys for iteration
    sorted_problem_sizes = sorted(results.keys())
    
    # 3. Start LaTeX generation (Optimized for single algorithm)
    
    # LaTeX document structure using booktabs for professional tables
    latex_code = [
        r'\documentclass[10pt]{article}',
        r'\usepackage{booktabs}',
        r'\usepackage{amsmath}',
        r'\usepackage[a4paper, margin=1in]{geometry}',
        r'\title{Scan Algorithm Performance}', # Updated Title
        r'\author{}',
        r'\date{}',
        r'\begin{document}',
        r'\maketitle',
        r'',
        r'\section*{Execution Time by Problem Size and Thread Count}',
        r'',
        r'This table summarizes the execution time in milliseconds (ms) for the parallel Scan algorithm, showing scaling efficiency with increasing thread counts ($P$).', # Updated intro text
        r'',
        # Begin the table environment
        r'\begin{table}[h!]',
        r'\centering',
        r'\caption{Scan Algorithm Performance Metrics (Execution Time in ms)}', # Updated Caption
        r'\label{tab:scan_performance}',
        # Define table columns: one L column for label, four C columns for thread times
        r'\begin{tabular}{@{}lcccc@{}}', # Changed from llcccc to lcccc
        r'\toprule',
        # Header Row 1: Merge columns for Execution Time
        r'Problem Size ($N$) & \multicolumn{4}{c}{Execution Time (ms) by Number of Threads ($P$)} \\', # Removed Algorithm column, updated thread label
        r'\cmidrule(lr){2-5}', # Changed from 3-6 to 2-5
        # Header Row 2: Specific thread counts
        r' & 1 & 2 & 4 & 8 \\', # Removed empty first cell
        r'\midrule'
    ]
    
    # 4. Generate Table Rows
    for i, size in enumerate(sorted_problem_sizes):
        
        # Get the first (and only) algorithm name for this size (which is 'Scan')
        # This approach gracefully handles the single-algorithm structure.
        alg = next(iter(results[size].keys()), None)
        if not alg:
            continue
            
        row_cells = []
        
        # 4a. Problem Size Cell 
        row_cells.append(str(size))
            
        # 4b. Thread Execution Times
        for threads in thread_columns:
            
            time_str = '-' 
            
            if threads in results[size][alg]:
                time_ms, converged = results[size][alg][threads]
                time_str = str(time_ms)
                
                # We retain the convergence check for robustness, but expect 'true'
                if not converged:
                    time_str += r'$^*$' 
                    
            row_cells.append(time_str)
        
        # Join cells and add to LaTeX code
        latex_code.append(' & '.join(row_cells) + r' \\')
        
        # Add a midrule after each Problem Size group, except the last one
        if i < len(sorted_problem_sizes) - 1:
            latex_code.append(r'\midrule')
            
    # 5. End LaTeX environment (Removed unnecessary footnote as all runs converged)
    latex_code.extend([
        r'\bottomrule',
        r'\end{tabular}',
        r'\end{table}',
        r'',
        r'\end{document}'
    ])

    return '\n'.join(latex_code)

# --- CSV Data Definition ---
# The data is provided as a multiline string for portability.
# UPDATED WITH SCAN DATA
csv_input = """Algorithm,ProblemSize,NumThreads,ExecutionTimeMs,Converged
Scan,16,1,102,true
Scan,16,2,92,true
Scan,16,4,91,true
Scan,16,8,83,true
Scan,64,1,92,true
Scan,64,2,95,true
Scan,64,4,90,true
Scan,64,8,87,true
Scan,1024,1,93,true
Scan,1024,2,99,true
Scan,1024,4,99,true
Scan,1024,8,90,true
Scan,4096,1,104,true
Scan,4096,2,98,true
Scan,4096,4,100,true
Scan,4096,8,92,true
Scan,8192,1,97,true
Scan,8192,2,97,true
Scan,8192,4,92,true
Scan,8192,8,95,true
Scan,1048576,1,312,true
Scan,1048576,2,386,true
Scan,1048576,4,317,true
Scan,1048576,8,311,true
"""

# Generate the LaTeX code
latex_output = generate_performance_table(csv_input)

# Print the generated LaTeX code
print(latex_output)

\documentclass[10pt]{article}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage[a4paper, margin=1in]{geometry}
\title{Scan Algorithm Performance}
\author{}
\date{}
\begin{document}
\maketitle

\section*{Execution Time by Problem Size and Thread Count}

This table summarizes the execution time in milliseconds (ms) for the parallel Scan algorithm, showing scaling efficiency with increasing thread counts ($P$).

\begin{table}[h!]
\centering
\caption{Scan Algorithm Performance Metrics (Execution Time in ms)}
\label{tab:scan_performance}
\begin{tabular}{@{}lcccc@{}}
\toprule
Problem Size ($N$) & \multicolumn{4}{c}{Execution Time (ms) by Number of Threads ($P$)} \\
\cmidrule(lr){2-5}
 & 1 & 2 & 4 & 8 \\
\midrule
16 & 102 & 92 & 91 & 83 \\
\midrule
64 & 92 & 95 & 90 & 87 \\
\midrule
1024 & 93 & 99 & 99 & 90 \\
\midrule
4096 & 104 & 98 & 100 & 92 \\
\midrule
8192 & 97 & 97 & 92 & 95 \\
\midrule
1048576 & 312 & 386 & 317 & 311 \\
\bottomrule
\end{tabular}
\end{table}

\end{document}
