In [2]:
import re
import pandas as pd

def parse_experiments_v3(text: str):
# Regex patterns
    matrix_re = re.compile(r"Matrix size: (\d+) x (\d+)")
    method_re = re.compile(r"Experimental results for (.+)")
    
    method_stats_re = {
        "non_empty_blocks": re.compile(r"Number of non-empty blocks: (\d+)"),
        "avg_nonzeros_per_block": re.compile(r"Average number of non-zeros per block: ([\d\.]+)"),
        "block_sparsity": re.compile(r"Block sparsity.*: ([\d\.]+)%"),
        "block_dimension": re.compile(r"Block dimension (\d+x\d+)"),
    }

    records = []
    current_size = None
    current_method = None
    current_entry = {}

    for line in text.splitlines():
        # Detect matrix size
        m = matrix_re.search(line)
        if m:
            current_size = f"{m.group(1)}x{m.group(2)}"
            continue

        # Detect method
        m = method_re.search(line)
        if m:
            current_method = m.group(1).strip()
            current_entry = {"matrix_size": current_size, "method": current_method}
            continue

        # Parse stats
        if current_method:
            for key, regex in method_stats_re.items():
                m = regex.search(line)
                if m:
                    value = m.group(1)
                    if key != "block_dimension":  # numeric
                        value = float(value)
                    current_entry[key] = value
            
            # If we’ve gathered some stats, finalize entry
            if len(current_entry) > 2 and all(k in current_entry for k in ["non_empty_blocks","avg_nonzeros_per_block","block_sparsity"]):
                records.append(current_entry)
                current_entry = {"matrix_size": current_size, "method": current_method}  # reset for next block

    df = pd.DataFrame(records)

    # --- Aggregate: average across experiments with same matrix_size & method ---
    agg_df = df.groupby(["matrix_size", "method"]).mean(numeric_only=True).reset_index()

    # --- Pivot: rows = matrix_size, columns = method+stat ---
    final_df = agg_df.pivot(index="matrix_size", columns="method")
    
    # Flatten MultiIndex columns
    final_df.columns = [f"{method}_{stat}" for stat, method in final_df.columns]
    final_df = final_df.reset_index()

    return final_df



with open("output.txt") as f:
    text = f.read()
df = parse_experiments_v3(text)



In [4]:
df

Unnamed: 0,method,experiment_id,experiment_total,matrix_size,block_size,mask_total_nonzeros,mask_sparsity,non_empty_blocks,avg_nonzeros_per_block,block_sparsity
0,Baseline,1,2,4096x4096,16x16,136487.0,0.008135,,,
1,Baseline,1,2,4096x4096,16x16,136487.0,0.008135,,,
2,Baseline,1,2,4096x4096,16x16,136487.0,0.008135,136487.0,,
3,Baseline,1,2,4096x4096,16x16,136487.0,0.008135,,7.05541,
4,Baseline,1,2,4096x4096,16x16,136487.0,0.008135,,,29.4104
...,...,...,...,...,...,...,...,...,...,...
398,Rownet_HyperGraph_Partitioning + Hypergraph_Pa...,2,2,16384x16384,16x16,629798.0,0.002346,,,
399,Rownet_HyperGraph_Partitioning + Hypergraph_Pa...,2,2,16384x16384,16x16,629798.0,0.002346,629798.0,,
400,Rownet_HyperGraph_Partitioning + Hypergraph_Pa...,2,2,16384x16384,16x16,629798.0,0.002346,,6.58661,
401,Rownet_HyperGraph_Partitioning + Hypergraph_Pa...,2,2,16384x16384,16x16,629798.0,0.002346,,,9.1105


In [9]:
df.to_excel("experiment_results.xlsx", sheet_name="Results")