# Read input data

In [None]:
import re
import pprint
import os

# Define a pattern to match the relevant data
RE_IGNORE_OPTIONAL_LINE = r'(?:[^\n]*)?'
pattern = [
    r'Running ([/\w]+) on (\w+)[^\n]*',
    RE_IGNORE_OPTIONAL_LINE,
    RE_IGNORE_OPTIONAL_LINE,
    r'Number of threads: (\d+)',
    r'Thread tot iterations: ((?:\d+ )*)',
    r'Thread tot writes: ((?:\d+ )*)',
    r'Top-down Bottom-up switches: ((?:\d+-\d+ )*)',
    r'Runtime: (\d+\.[\d]+)'
]
pattern = r'\n'.join(pattern)

# List all files in the 'data_balancing' directory
all_files = os.listdir('data_balancing')

# Filter files that start with 'balancing_'
files = [f for f in all_files if f.startswith('balancing_')]
data = {}

for filename in files:
    # Read the text to parse from 'data_datasets/frontiers.log'
    with open(f'data_balancing/{filename}', 'r') as file:
        txt = file.read()

    # Find all matches in the string
    matches = re.findall(pattern, txt)

    # Convert matches to a list of dictionaries
    data[filename[len('balancing_'):-4]] = [{
        'binary': match[0],
        'dataset': match[1],
        'threads': int(match[2]),
        'threads_niter': [int(x) for x in match[3].split()],
        'threads_writes': [int(x) for x in match[4].split()],
        'td_bu_switches': [(int(x.split('-')[0]),int(x.split('-')[1])) for x in match[5].split()],
        'runtime': match[6]
        } for match in matches]

pprint.pprint(data)

# Generate plots on runtime and speedups

In [None]:
import matplotlib.pyplot as plt

# Generate a different figure for each algorithm
for algorithm, datasets in data.items():
    n_cols = len(datasets)
    fig, axes = plt.subplots(1, n_cols, figsize=(20, 5))
    
    for j, dataset in enumerate(datasets):
        ax = axes[j] if n_cols > 1 else axes
        x = range(len(dataset['threads_niter']))
        width = 0.35

        rects1 = ax.bar(x, dataset['threads_niter'], width, label='Iterations')
        rects2 = ax.bar([p + width for p in x], dataset['threads_writes'], width, label='Writes')

        ax.set_title(f"{algorithm} - {dataset['dataset']}")
        ax.set_xlabel('Thread')
        ax.set_ylabel('Count')
        #ax.legend()

    plt.tight_layout()
    plt.show()
    # Generate boxplots for iterations and writes
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))

    # Boxplot for iterations
    iterations_data = [dataset['threads_niter'] for dataset in datasets]
    axes[0].boxplot(iterations_data)
    axes[0].set_title('Iterations per Thread')
    axes[0].set_xlabel('Dataset')
    axes[0].set_ylabel('Iterations')

    # Boxplot for writes
    writes_data = [dataset['threads_writes'] for dataset in datasets]
    axes[1].boxplot(writes_data)
    axes[1].set_title('Writes per Thread')
    axes[1].set_xlabel('Dataset')
    axes[1].set_ylabel('Writes')

    plt.tight_layout()
    plt.show()