# Read input data

In [None]:
import re
import pprint

# Define a pattern to match the relevant data
pattern = r'Running (build/\w+) on (\w+)\nNumber of threads: (\d+)\nFrontier sizes: ([\d\s]+)'

# Read the text to parse from 'data_datasets/frontiers.log'
with open('data_datasets/frontiers.log', 'r') as file:
    file_format = file.read()

# Find all matches in the string
matches = re.findall(pattern, file_format)

# Convert matches to a list of dictionaries
frontier_data = [{'binary': match[0], 'dataset': match[1], 'threads': int(match[2]), 'frontier_sizes': list(map(int, match[3].split()))} for match in matches]

pprint.pprint(frontier_data)

# Generate plots on frontiers

In [None]:
import matplotlib.pyplot as plt

# Separate the data based on the size of the frontier_sizes array
small_frontier_data = [item for item in frontier_data if len(item['frontier_sizes']) < 100]
large_frontier_data = [item for item in frontier_data if len(item['frontier_sizes']) >= 100]

# Create subplots
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 3))

# Plot small frontier data
for item in small_frontier_data:
    axes[0].plot(item['frontier_sizes'], label=item['dataset'])
axes[0].set_title('Small-diameter Graphs')
axes[0].set_xlabel('Step')
axes[0].set_ylabel('Frontier Size')
axes[0].legend()

# Plot large frontier data
for item in large_frontier_data:
    axes[1].plot(item['frontier_sizes'], label=item['dataset'])
axes[1].set_title('Large-diameter Graphs')
axes[1].set_xlabel('Step')
axes[1].set_ylabel('Frontier Size')
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
# Create subplots
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

# Boxplot for small frontier data
axes[0].boxplot([item['frontier_sizes'] for item in small_frontier_data], tick_labels=[item['dataset'] for item in small_frontier_data])
axes[0].set_title('Small-diameter Graphs')
axes[0].set_xlabel('Dataset')
axes[0].set_ylabel('Frontier Size')

# Boxplot for large frontier data
axes[1].boxplot([item['frontier_sizes'] for item in large_frontier_data], tick_labels=[item['dataset'] for item in large_frontier_data])
axes[1].set_title('Large-diameter Graphs')
axes[1].set_xlabel('Dataset')
axes[1].set_ylabel('Frontier Size')

plt.tight_layout()
plt.show()