In [8]:
import re
import pandas as pd

def parse_experiments_v3(text: str):
    # Regex patterns
    matrix_re = re.compile(r"Matrix size: (\d+) x (\d+)")
    method_re = re.compile(r"Experimental results for (.+)")
    
    method_stats_re = {
        "non_empty_blocks": re.compile(r"Number of non-empty blocks: (\d+)"),
        "avg_nonzeros_per_block": re.compile(r"Average number of non-zeros per block: ([\d\.]+)"),
        "block_sparsity": re.compile(r"Block sparsity.*: ([\d\.]+)%"),
        "block_dimension": re.compile(r"Block dimension (\d+x\d+)"),
    }

    records = []
    current_size = None
    current_method = None
    current_entry = {}

    for line in text.splitlines():
        # Detect matrix size
        m = matrix_re.search(line)
        if m:
            current_size = f"{m.group(1)}x{m.group(2)}"
            continue

        # Detect method
        m = method_re.search(line)
        if m:
            current_method = m.group(1).strip()
            current_entry = {"matrix_size": current_size, "method": current_method}
            continue

        # Parse stats
        if current_method:
            for key, regex in method_stats_re.items():
                m = regex.search(line)
                if m:
                    value = m.group(1)
                    if key != "block_dimension":  # numeric fields
                        value = float(value)
                    current_entry[key] = value
            
            # Save completed entry
            if len(current_entry) > 2 and all(
                k in current_entry for k in ["non_empty_blocks","avg_nonzeros_per_block","block_sparsity"]
            ):
                records.append(current_entry)
                current_entry = {"matrix_size": current_size, "method": current_method}

    df = pd.DataFrame(records)

    # --- Aggregate: mean across experiments with same matrix_size & method ---
    agg_df = df.groupby(["matrix_size", "method"]).mean(numeric_only=True).reset_index()

    # --- Pivot: rows = matrix_size, columns = method+stat ---
    final_df = agg_df.pivot(index="matrix_size", columns="method")
    final_df.columns = [f"{method}_{stat}" for stat, method in final_df.columns]
    final_df = final_df.reset_index()

    # --- Sort by numeric matrix size (first dimension) ---
    final_df["size_int"] = final_df["matrix_size"].apply(lambda x: int(x.split("x")[0]))
    final_df = final_df.sort_values("size_int").drop(columns="size_int").reset_index(drop=True)

    return final_df



with open("output.txt") as f:
    text = f.read()
df = parse_experiments_v3(text)



In [9]:
df

Unnamed: 0,matrix_size,Baseline_non_empty_blocks,Hypergraph_Partitioning_Nonzero_Block_Minimization_non_empty_blocks,Hypergraph_Partitioning_Single_Border_non_empty_blocks,Hypergraph_Partitioning_Single_Border + Hypergraph_Partitioning_Nonzero_Block_Minimization_non_empty_blocks,Hypergraph_Partitioning_Two_Constraint_non_empty_blocks,RCM_non_empty_blocks,RCM + Hypergraph_Partitioning_Nonzero_Block_Minimization_non_empty_blocks,Rownet_HyperGraph_Partitioning_non_empty_blocks,Rownet_HyperGraph_Partitioning + Hypergraph_Partitioning_Nonzero_Block_Minimization_non_empty_blocks,...,Rownet_HyperGraph_Partitioning + Hypergraph_Partitioning_Nonzero_Block_Minimization_avg_nonzeros_per_block,Baseline_block_sparsity,Hypergraph_Partitioning_Nonzero_Block_Minimization_block_sparsity,Hypergraph_Partitioning_Single_Border_block_sparsity,Hypergraph_Partitioning_Single_Border + Hypergraph_Partitioning_Nonzero_Block_Minimization_block_sparsity,Hypergraph_Partitioning_Two_Constraint_block_sparsity,RCM_block_sparsity,RCM + Hypergraph_Partitioning_Nonzero_Block_Minimization_block_sparsity,Rownet_HyperGraph_Partitioning_block_sparsity,Rownet_HyperGraph_Partitioning + Hypergraph_Partitioning_Nonzero_Block_Minimization_block_sparsity
0,4096x4096,136499.5,136499.5,136499.5,136499.5,136499.5,136499.5,136499.5,136499.5,136499.5,...,8.64301,29.44615,27.42715,29.76695,23.9776,35.55245,39.2651,34.4989,29.6651,24.0407
1,8192x8192,294138.0,294138.0,294138.0,294138.0,294138.0,294138.0,294138.0,294138.0,294138.0,...,7.48399,18.9756,17.7107,18.856,16.2908,22.18515,23.8303,21.1251,18.8328,14.9653
2,16384x16384,629885.5,629885.5,629885.5,629885.5,629885.5,629885.5,629885.5,629885.5,629885.5,...,6.61556,11.27275,10.4855,11.09495,9.118505,12.47865,13.54535,12.27755,11.08625,9.072055


In [10]:
df.to_excel("experiment_results.xlsx", sheet_name="Results")