In [2]:
import yaml
import pandas as pd

document = """
comm_name_to_numa_id_write:
  Task1->Task3: NUMA IDs = 0 
  Task1->Task2: NUMA IDs = 0 

comm_name_to_numa_id_read:
  Task1->Task2: NUMA IDs = 0 1 
  Task1->Task3: NUMA IDs = 0 

exec_name_to_locality:
  Task2: NUMA ID = 1, Core ID = 47, Voluntary CS = 8, Involuntary CS = 25, Core Migrations = 0
  Task3: NUMA ID = 0, Core ID = 0, Voluntary CS = 1, Involuntary CS = 89, Core Migrations = 0
  Task1: NUMA ID = 0, Core ID = 0, Voluntary CS = 1, Involuntary CS = 50, Core Migrations = 0
"""

# print(yaml.dump(yaml.load(document, Loader=yaml.FullLoader), default_flow_style=False, allow_unicode=True, indent=4))

data = yaml.load(document, Loader=yaml.FullLoader)

# Extract relevant data
exec_name_to_locality = data["exec_name_to_locality"]
comm_name_to_numa_id_write = data["comm_name_to_numa_id_write"]
comm_name_to_numa_id_read = data["comm_name_to_numa_id_read"]

# Initialize an empty list to store rows for the DataFrame
rows = []

# Helper function to process operations
def process_access(data_item, task_name, cpu_node, mem_nodes, access_type):
    for mem_node in mem_nodes:
        rows.append([data_item, task_name, cpu_node, mem_node, access_type])

# Process write access operations from comm_name_to_numa_id_write
for comm_name, numa_ids in comm_name_to_numa_id_write.items():
    mem_nodes = numa_ids.split("NUMA IDs = ")[1].strip().split()
    for task_name, locality in exec_name_to_locality.items():
        cpu_node = locality.split("NUMA ID = ")[1].split(",")[0].strip()
        if task_name in comm_name:
            if comm_name.split("->")[0] == task_name:  # Write access (task_name on the left)
                process_access(comm_name, task_name, cpu_node, mem_nodes, "write")

# Process read access operations from comm_name_to_numa_id_read
for comm_name, numa_ids in comm_name_to_numa_id_read.items():
    mem_nodes = numa_ids.split("NUMA IDs = ")[1].strip().split()
    for task_name, locality in exec_name_to_locality.items():
        cpu_node = locality.split("NUMA ID = ")[1].split(",")[0].strip()
        if task_name in comm_name:
            if comm_name.split("->")[1] == task_name:  # Read access (task_name on the right)
                process_access(comm_name, task_name, cpu_node, mem_nodes, "read")

# Create a DataFrame
df = pd.DataFrame(rows, columns=["data_item", "task_name", "cpu_node", "mem_node", "access_type"])

# Display the DataFrame
print(df)

      data_item task_name cpu_node mem_node access_type
0  Task1->Task3     Task1        0        0       write
1  Task1->Task2     Task1        0        0       write
2  Task1->Task2     Task2        1        0        read
3  Task1->Task2     Task2        1        1        read
4  Task1->Task3     Task3        0        0        read
