In [13]:
from IPython.display import Markdown, display
import re

def extract(target):
    summary = []

    for model, log_path in target.items():
        total_runtime = None
        throughput = None

        with open(log_path, 'r') as file:
            for line in file:
                # Extract total runtime in milliseconds
                if "Finished in (ms):" in line:
                    match = re.search(r"Finished in \(ms\): (\d+)", line)
                    if match:
                        total_runtime = int(match.group(1)) / 1000  # Convert to seconds

                # Extract throughput in samples per second
                if "Basecalled @ Samples/s:" in line:
                    match = re.search(r"Basecalled @ Samples/s: ([\deE\+\-\.]+)", line)
                    if match:
                        throughput = float(match.group(1))

        if total_runtime is not None and throughput is not None:
            summary.append({
                'Model': model,
                'Total Runtime (s)': total_runtime,
                'Throughput (Samples/s)': throughput
            })
        else:
            print(f"Warning: Missing data for model {model} in log {log_path}")

    # Generate Markdown table
    md_table = "| Model | Total Runtime (s) | Throughput (Samples/s) |\n"
    md_table += "|-------|-------------------|-------------------------|\n"
    for entry in summary:
        md_table += f"| {entry['Model']} | {entry['Total Runtime (s)']:.3f} | {entry['Throughput (Samples/s)']:.6e} |\n"

    display(Markdown(md_table))


# Base models

In [15]:
target = {m: f"../models/{m}_test/sup.log" for m in ["sup", "frank", "je"]}
extract(target)

| Model | Total Runtime (s) | Throughput (Samples/s) |
|-------|-------------------|-------------------------|
| sup | 97.001 | 4.164530e+05 |
| frank | 252.978 | 1.596833e+05 |
| je | 264.553 | 1.526967e+05 |


# One-shot models

In [16]:
target = {f"sparse_{s}_retrained": f"one_shot/sparse_{s}_retrained/run.log" for s in [1-0.75**a for a in range(1,9)]}
extract(target)

| Model | Total Runtime (s) | Throughput (Samples/s) |
|-------|-------------------|-------------------------|
| sparse_0.25_retrained | 258.739 | 1.561278e+05 |
| sparse_0.4375_retrained | 254.481 | 1.587402e+05 |
| sparse_0.578125_retrained | 258.976 | 1.559850e+05 |
| sparse_0.68359375_retrained | 219.840 | 1.837535e+05 |
| sparse_0.7626953125_retrained | 68.135 | 5.928870e+05 |
| sparse_0.822021484375_retrained | 7.415 | 5.447924e+06 |
| sparse_0.86651611328125_retrained | 7.816 | 5.168419e+06 |
| sparse_0.8998870849609375_retrained | 7.615 | 5.304840e+06 |


# Iterative models

In [18]:
target = {f"iter_{s}_retrained": f"iterative_0.25/iter_{s}_retrained/run.log" for s in range(1,9)}
extract(target)

| Model | Total Runtime (s) | Throughput (Samples/s) |
|-------|-------------------|-------------------------|
| iter_1_retrained | 247.560 | 1.631781e+05 |
| iter_2_retrained | 257.383 | 1.569504e+05 |
| iter_3_retrained | 257.693 | 1.567616e+05 |
| iter_4_retrained | 255.709 | 1.579779e+05 |
| iter_5_retrained | 229.840 | 1.757586e+05 |
| iter_6_retrained | 186.042 | 2.171357e+05 |
| iter_7_retrained | 90.042 | 4.486391e+05 |
| iter_8_retrained | 29.459 | 1.371274e+06 |
