In [None]:
from collections import defaultdict
import re
import os

def extract_timing_summary(filepath, table_amount):
    with open(filepath, 'r') as file:
        lines = file.readlines()

    in_summary = False
    summary = {}
    count = 0 
    for line in lines:
        if count > table_amount:
            break #We have read the defined amount of tables
        if '--- Timing Summary ---' in line:
            in_summary = True
            continue #skip rest of loop
        if in_summary:
            if line.strip().startswith('---'):
                in_summary = False
                count += 1 
                continue  # End of timing summary
            # Match lines like "Label: value unit"
            match = re.match(r'(.+?):\s+([\d.]+)\s*(ms|s|us)?', line)
            if match:
                label = match.group(1).strip() + " " + str(count)
                value = float(match.group(2))
                unit = match.group(3) or 'ms'  # default to ms if missing
                summary[label] = {'value': value, 'unit': unit}

    return summary

def extract_all_timing_summaries(filepath):
    with open(filepath, 'r') as file:
        lines = file.readlines()

    summaries = []
    in_summary = False
    current_summary = {}

    for line in lines:
        if '--- Timing Summary ---' in line:
            if current_summary:
                summaries.append(current_summary)
                current_summary = {}
            in_summary = True
            continue

        if in_summary:
            if line.strip() == '' or line.strip().startswith('---'):
                if current_summary:
                    summaries.append(current_summary)
                    current_summary = {}
                in_summary = False
                continue

            match = re.match(r'(.+?):\s+([\d.]+)\s*(ms|s|us)?', line)
            if match:
                label = match.group(1).strip()
                value = float(match.group(2))
                unit = match.group(3) or 'ms'
                current_summary[label] = {'value': value, 'unit': unit}

    # Catch the last block if file ends right after it
    if current_summary:
        summaries.append(current_summary)

    return summaries
def average_timing_summaries(summaries):
    totals = defaultdict(float)
    counts = defaultdict(int)

    for summary in summaries:
        for key, value in summary.items():
            totals[key] += value['value']
            counts[key] += 1

    averages = {key: totals[key] / counts[key] for key in totals}
    return averages

cwd = os.getcwd()

path = cwd + "/GPU/out/out_gpu.txt"
path_literal = cwd + "/GPU/out/out_gpu_literal.txt"
timings = extract_timing_summary(path, 5)

summaries = extract_all_timing_summaries(path)


if not summaries:
    print("No timing summaries found.")
else:
    averages = average_timing_summaries(summaries)
    print("\n--- Averaged Timing Summary (over {} runs) ---".format(len(summaries)))
    for label, avg in sorted(averages.items()):
        print(f"{label}: {avg:.3f} ms")

--- Extracted Timing Summary ---

--- Averaged Timing Summary (over 5 runs) ---
Data transfer to GPU: 11.200 ms
Device memory alloc: 331.200 ms
Host preprocessing: 5309.000 ms
Kernel execution: 0.000 ms
Post GPU processing: 0.000 ms
Regex pattern matching: 66.000 ms
Total runtime: 5754.800 ms
