# Compact Profiling Report (data access pattern)

In [1]:
import yaml
import pandas as pd
import numpy as np

document = """
distance_latency_ns:
  - [10, 10]
  - [10, 10]

numa_mappings_write:
  Task1->Task3: {numa_ids: [0]}
  Task1->Task2: {numa_ids: [0]}

numa_mappings_read:
  Task1->Task2: {numa_ids: [0, 1]}
  Task1->Task3: {numa_ids: [0, 1]}

name_to_thread_locality:
  Task2: {numa_id: 0, core_id: 0, voluntary_cs: 5, involuntary_cs: 126, core_migrations: 0}
  Task3: {numa_id: 1, core_id: 47, voluntary_cs: 1, involuntary_cs: 19, core_migrations: 0}
  Task1: {numa_id: 0, core_id: 0, voluntary_cs: 0, involuntary_cs: 68, core_migrations: 0}
"""

# Load YAML document
data = yaml.load(document, Loader=yaml.FullLoader)

# Extract relevant data
name_to_thread_locality = data["name_to_thread_locality"]
numa_mappings_write = data["numa_mappings_write"]
numa_mappings_read = data["numa_mappings_read"]

# Initialize an empty list to store rows for the DataFrame
rows = []

# Helper function to process operations
def process_access(data_item, task_name, cpu_node, mem_nodes, core_id, access_type):
    for mem_node in mem_nodes:
        rows.append([data_item, task_name, cpu_node, mem_node, core_id, access_type])

# Process write access operations from numa_mappings_write
for comm_name, mapping in numa_mappings_write.items():
    mem_nodes = mapping["numa_ids"]
    for task_name, locality in name_to_thread_locality.items():
        cpu_node = locality["numa_id"]
        core_id = locality["core_id"]
        if task_name in comm_name:
            if comm_name.split("->")[0] == task_name:  # Write access (task_name on the left)
                process_access(comm_name, task_name, cpu_node, mem_nodes, core_id, "write")

# Process read access operations from numa_mappings_read
for comm_name, mapping in numa_mappings_read.items():
    mem_nodes = mapping["numa_ids"]
    for task_name, locality in name_to_thread_locality.items():
        cpu_node = locality["numa_id"]
        core_id = locality["core_id"]
        if task_name in comm_name:
            if comm_name.split("->")[1] == task_name:  # Read access (task_name on the right)
                process_access(comm_name, task_name, cpu_node, mem_nodes, core_id, "read")

# Create a DataFrame
df = pd.DataFrame(rows, columns=["data_item", "task_name", "cpu_node", "mem_node", "core_id", "access_type"])

# Display the DataFrame
df[["task_name", "core_id", "cpu_node", "mem_node", "data_item", "access_type"]]

Unnamed: 0,task_name,core_id,cpu_node,mem_node,data_item,access_type
0,Task1,0,0,0,Task1->Task3,write
1,Task1,0,0,0,Task1->Task2,write
2,Task2,0,0,0,Task1->Task2,read
3,Task2,0,0,1,Task1->Task2,read
4,Task3,47,1,0,Task1->Task3,read
5,Task3,47,1,1,Task1->Task3,read


# Matrix of local/remote accesses

In [2]:
# Function to aggregate based on equal or different cpu_node and mem_node
def aggregate_data(df, equal=True):
    if equal:
        # Aggregate when cpu_node and mem_node are equal
        aggregated_df = df[df['cpu_node'] == df['mem_node']].groupby(['cpu_node', 'mem_node']).size().reset_index(name='count')
    else:
        # Aggregate when cpu_node and mem_node are different
        aggregated_df = df[df['cpu_node'] != df['mem_node']].groupby(['cpu_node', 'mem_node']).size().reset_index(name='count')
    
    return aggregated_df

# Example usage: aggregate when cpu_node and mem_node are equal
equal_aggregated_df = aggregate_data(df, equal=True)
print("Aggregated Data (cpu_node == mem_node), Local accesses:")
print(equal_aggregated_df)

# Example usage: aggregate when cpu_node and mem_node are different
different_aggregated_df = aggregate_data(df, equal=False)
print("\nAggregated Data (cpu_node != mem_node), Remote accesses:")
print(different_aggregated_df)

Aggregated Data (cpu_node == mem_node), Local accesses:
   cpu_node  mem_node  count
0         0         0      3
1         1         1      1

Aggregated Data (cpu_node != mem_node), Remote accesses:
   cpu_node  mem_node  count
0         0         1      1
1         1         0      1


In [3]:
def create_aggregation_matrix(df, equal=None):
    """
    Creates a matrix aggregating counts based on cpu_node vs mem_node.
    
    Parameters:
        df (pd.DataFrame): The input DataFrame containing 'cpu_node' and 'mem_node' columns.
        equal (bool or None): 
            - If True, aggregate where cpu_node == mem_node.
            - If False, aggregate where cpu_node != mem_node.
            - If None, aggregate all rows regardless of equality.
    
    Returns:
        pd.DataFrame: A matrix with aggregated counts (cpu_node vs mem_node).
    """
    # Ensure cpu_node and mem_node are treated as integers for matrix aggregation
    df['cpu_node'] = df['cpu_node'].astype(int)
    df['mem_node'] = df['mem_node'].astype(int)
    
    # Apply filtering based on the equality parameter
    if equal is True:
        filtered_df = df[df['cpu_node'] == df['mem_node']]
    elif equal is False:
        filtered_df = df[df['cpu_node'] != df['mem_node']]
    else:  # equal is None
        filtered_df = df
    
    # Create a pivot table for the matrix (cpu_node vs mem_node)
    matrix = filtered_df.pivot_table(
        index='cpu_node',
        columns='mem_node',
        values='task_name',
        aggfunc='count',
        fill_value=0
    )
    
    return matrix

# Example usage: aggregate when cpu_node and mem_node are equal
matrix_local_accesses_equal = create_aggregation_matrix(df, equal=True)
print("Aggregated Data (cpu_node == mem_node), Local accesses:")
print(matrix_local_accesses_equal)

# Example usage: aggregate when cpu_node and mem_node are different
matrix_remote_accesses_different = create_aggregation_matrix(df, equal=False)
print("\nAggregated Data (cpu_node != mem_node), Remote accesses:")
print(matrix_remote_accesses_different)

# Example usage: aggregate when cpu_node and mem_node are different
matrix_total_accesses_none = create_aggregation_matrix(df, equal=None)
print("\nAggregated Data (cpu_node != mem_node), Total accesses:")
print(matrix_total_accesses_none)

# Example usage: aggregate when cpu_node and mem_node are equal
print("\nAggregated Data (cpu_node == mem_node), Local accesses, NUMA locality (%):")
print(matrix_local_accesses_equal / matrix_total_accesses_none.values.sum())

# Example usage: aggregate when cpu_node and mem_node are different
matrix_remote_accesses_different = create_aggregation_matrix(df, equal=False)
print("\nAggregated Data (cpu_node != mem_node), Remote accesses (%):")
print(matrix_remote_accesses_different / matrix_total_accesses_none.values.sum())

Aggregated Data (cpu_node == mem_node), Local accesses:
mem_node  0  1
cpu_node      
0         3  0
1         0  1

Aggregated Data (cpu_node != mem_node), Remote accesses:
mem_node  0  1
cpu_node      
0         0  1
1         1  0

Aggregated Data (cpu_node != mem_node), Total accesses:
mem_node  0  1
cpu_node      
0         3  1
1         1  1

Aggregated Data (cpu_node == mem_node), Local accesses, NUMA locality (%):
mem_node    0         1
cpu_node               
0         0.5  0.000000
1         0.0  0.166667

Aggregated Data (cpu_node != mem_node), Remote accesses (%):
mem_node         0         1
cpu_node                    
0         0.000000  0.166667
1         0.166667  0.000000


In [4]:
matrix_local_accesses_equal / matrix_total_accesses_none.values.sum()

mem_node,0,1
cpu_node,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.5,0.0
1,0.0,0.166667


In [5]:
matrix_remote_accesses_different / matrix_total_accesses_none.values.sum()

mem_node,0,1
cpu_node,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.0,0.166667
1,0.166667,0.0


# NUMA Factor 

Ratio between the remote memory access latency and local memory access latency.

In [6]:
def compute_numa_factor_with_pandas(data, key='distance_latency_ns'):
    """
    Reads a YAML file with a matrix directly under the key (e.g., "NUMALatency"),
    and computes the average NUMA factor (ratio of remote to local memory access latency)
    for matrices of size n x n.

    :param data: the YAML file containing the matrix.
    :return: Average NUMA factor.
    """
    matrix = data[key]

    # Convert the matrix into a Pandas DataFrame
    df = pd.DataFrame(matrix)

    # Extract the diagonal (local latencies)
    local_latencies = pd.Series(df.values.diagonal())

    # Compute the remote-to-local latency ratios
    ratios = []
    for i in range(df.shape[0]):  # Iterate through rows
        for j in range(df.shape[1]):  # Iterate through columns
            if i != j:  # Only consider off-diagonal elements (remote latencies)
                remote_latency = df.iloc[i, j]
                local_latency = local_latencies[i]
                ratios.append(remote_latency / local_latency)

    # Compute the average NUMA factor
    average_numa_factor = sum(ratios) / len(ratios)

    return average_numa_factor

average_numa_factor = compute_numa_factor_with_pandas(data)
print(f"Average NUMA Factor (accesing remote memory is): {average_numa_factor} times slower than accesing local memory (in average).")

Average NUMA Factor (accesing remote memory is): 1.0 times slower than accesing local memory (in average).


```{math}
\frac{1}{T \cdot Q} \times \text{weighted_sum}
```

In [7]:
def compute_numa_metric(t_accesses_matrix, d_distance_matrix):
    """
    Computes the NUMA metric based on the provided matrices.
    
    Parameters:
        t_accesses_matrix (pd.DataFrame): Matrix of task accesses.
        d_distance_matrix (pd.DataFrame): Matrix of distances.
        
    Returns:
        float: The calculated NUMA metric.
    """
    # Step 2: Compute q_distance_matrix (d_distance_matrix with diagonal set to 0)
    q_distance_matrix = d_distance_matrix.copy()
    np.fill_diagonal(q_distance_matrix.values, 0)

    # Step 3: Compute T and Q
    T = t_accesses_matrix.values.sum()  # Sum of elements in t_accesses_matrix
    Q = q_distance_matrix.values.sum()  # Sum of elements in q_distance_matrix

    # Step 4: Compute weighted_sum
    weighted_sum = (t_accesses_matrix * q_distance_matrix).values.sum()

    # Step 5: Compute the metric
    numa_metric = (1 / (T * Q)) * weighted_sum if T > 0 and Q > 0 else 0

    return numa_metric

# Example t_accesses_matrix
t_accesses_data = {
    0: [10, 5, 2],
    1: [3, 7, 4],
    2: [1, 6, 8]
}
t_accesses_matrix = pd.DataFrame(t_accesses_data)

# Example d_distance_matrix
d_distance_data = {
    0: [1, 2, 4],
    1: [2, 1, 3],
    2: [4, 3, 1]
}
d_distance_matrix = pd.DataFrame(d_distance_data)

# Compute NUMA metric
numa_metric = compute_numa_metric(t_accesses_matrix, d_distance_matrix)
print(f"NUMA Metric: {numa_metric}")

NUMA Metric: 0.07004830917874395
