#### Convert data from PAPI counters to CSV

In [12]:
import os
import json
import csv
import re

def process_papi_files(root_dir, output_csv):
  """
  Parses PAPI JSON output files in specified subdirectories and aggregates results into a CSV file.

  Args:
    root_dir (str): The directory containing the subdirectories with PAPI data.
    output_csv (str): The path to the output CSV file.
  """
  # Regex to parse the directory name
  dir_pattern = re.compile(r'^(gapbs|openmp|pthreads)_(\d+)cpus_(.+).mtx$')

  results = []

  for dir_name in os.listdir(root_dir):
    dir_path = os.path.join(root_dir, dir_name)
    if not os.path.isdir(dir_path):
      continue

    match = dir_pattern.match(dir_name)
    if not match:
      continue

    implementation, ncpus, dataset_name = match.groups()
    total_tca = 0
    total_tcm = 0

    dir_path = os.path.join(dir_path, "papi_hl_output")

    found_json = False
    for file_name in os.listdir(dir_path):
      if file_name.endswith('.json'):
        json_path = os.path.join(dir_path, file_name)
        try:
          with open(json_path, 'r') as f:
            data = json.load(f)

          threads = data.get("threads", {})
          for thread_id, thread_data in threads.items():
            regions = thread_data.get("regions", {})
            for region_id, region_data in regions.items():
              total_tca += int(region_data.get("PAPI_L3_TCA", 0))
              total_tcm += int(region_data.get("PAPI_L3_TCM", 0))
          found_json = True
          break # Process only the first JSON file found in the directory
        except (json.JSONDecodeError, KeyError, ValueError) as e:
          print(f"Error processing {json_path}: {e}")
          continue
    
    if found_json:
      results.append({
        'implementation': implementation,
        'ncpus': ncpus,
        'dataset_name': dataset_name,
        'PAPI_L3_TCA': total_tca,
        'PAPI_L3_TCM': total_tcm
      })

  # Write results to CSV
  if not results:
    print("No data processed.")
    return

  with open(output_csv, 'w', newline='') as csvfile:
    fieldnames = ['implementation', 'ncpus', 'dataset_name', 'PAPI_L3_TCA', 'PAPI_L3_TCM']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for row in results:
      writer.writerow(row)

  print(f"Successfully wrote results to {output_csv}")

directory='../papi'
output_file = 'papi_data.csv'
process_papi_files(directory, output_file)

Successfully wrote results to papi_data.csv
