In [9]:
import json
import os
import matplotlib.pyplot as plt
import re
from collections import defaultdict

# Configuration
folder_path = "averaged_results"  # Replace with your folder path
pattern = re.compile(r"(.*)_numThreads_(\d+)_average\.json")  # Regex to parse filenames

# Data structure: {prefix: {thread_count: average_ops}}
data = defaultdict(dict)

# Read all JSON files
for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        match = pattern.match(filename)
        if match:
            prefix = match.group(1)  # Full prefix (e.g., "aksenov_splaylist_64.debra_zipfian")
            num_threads = int(match.group(2))  # Thread count
            file_path = os.path.join(folder_path, filename)
            
            # Load data
            with open(file_path, 'r') as f:
                json_data = json.load(f)
                data[prefix][num_threads] = json_data["average_num_operations_total"]

# Generate one graph per prefix
for prefix, thread_data in data.items():
    # Sort thread counts and extract values
    sorted_threads = sorted(thread_data.keys())
    avg_ops = [thread_data[t] for t in sorted_threads]
    
    # Create a new figure
    plt.figure(figsize=(10, 6))
    plt.plot(
        sorted_threads,
        avg_ops,
        marker='o',
        linestyle='-',
        color='blue',
        markersize=8,
        linewidth=2
    )
    
    # Customize title and labels (shorten prefix for readability if needed)
    short_prefix = prefix.split("_")[-1]  # Example: "zipfian" from "aksenov_..._zipfian"
    plt.xlabel("Number of Threads", fontsize=12)
    plt.ylabel("Average Total Operations", fontsize=12)
    plt.title(f"Throughput Scaling: {short_prefix}", fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.xticks(range(1, 17))  # Show all thread counts 1-16
    
    # Optional: Save the figure (uncomment line below)
    # plt.savefig(f"{short_prefix}_throughput.png", dpi=300, bbox_inches='tight')
    
    plt.tight_layout()
    plt.show()


ModuleNotFoundError: No module named 'matplotlib'