In [4]:
import re
import pandas as pd
from collections import defaultdict

def parse_experiments(text: str):
    # Regex patterns
    matrix_size_re = re.compile(r"Running experiments for matrix size: (\d+ x \d+)")
    experiment_re = re.compile(r"Experiment \d+ of \d+")
    method_re = re.compile(r"Experimental results for (.+)")
    
    mask_stats_re = {
        "mask_total_nonzeros": re.compile(r"Total non-zeros in the mask: (\d+)"),
        "mask_sparsity": re.compile(r"Sparsity of the mask: ([\d\.]+)"),
    }
    
    method_stats_re = {
        "non_empty_blocks": re.compile(r"Number of non-empty blocks: (\d+)"),
        "avg_nonzeros_per_block": re.compile(r"Average number of non-zeros per block: ([\d\.]+)"),
        "block_sparsity": re.compile(r"Block sparsity.*: ([\d\.]+)%"),
        "block_dimension": re.compile(r"Block dimension (\d+x\d+): (\d+) blocks"),
    }

    results = defaultdict(lambda: defaultdict(list))
    current_matrix = None
    current_method = None
    current_exp_data = {}
    
    lines = text.splitlines()
    for line in lines:
        # Detect matrix size
        m = matrix_size_re.search(line)
        if m:
            current_matrix = m.group(1)
            continue
        
        # Detect experiment boundary -> reset
        if experiment_re.search(line):
            current_exp_data = {"matrix_size": current_matrix}
            continue

        # Mask stats
        for key, regex in mask_stats_re.items():
            m = regex.search(line)
            if m:
                current_exp_data[key] = float(m.group(1))
        
        # Method detection
        m = method_re.search(line)
        if m:
            current_method = m.group(1).strip()
            continue
        
        # Method stats
        if current_method:
            entry = {}
            for key, regex in method_stats_re.items():
                m = regex.search(line)
                if m:
                    if key == "block_dimension":
                        entry["block_dimension"] = m.group(1)
                        entry["block_count"] = int(m.group(2))
                    else:
                        entry[key] = float(m.group(1))
            if entry:
                # Store experiment+method results
                results[current_matrix][current_method].append({**current_exp_data, **entry})
    
    # Aggregate into DataFrame
    records = []
    for matrix_size, methods in results.items():
        for method, exps in methods.items():
            row = {"matrix_size": matrix_size, "method": method}
            # Average across experiments
            df_exp = pd.DataFrame(exps)
            for col in df_exp.columns:
                if col not in ["matrix_size", "method", "block_dimension"]:
                    row[col] = df_exp[col].mean()
            row["block_dimension"] = df_exp["block_dimension"].iloc[0]
            records.append(row)
    
    df = pd.DataFrame(records)
    return df

# Example usage
if __name__ == "__main__":
    with open("output.txt") as f:
        text = f.read()
    df = parse_experiments(text)
    df

In [8]:
df.to_excel("experiment_results.xlsx", sheet_name="Results")