In [None]:
import pandas as pd
import os
from itables import init_notebook_mode, show
init_notebook_mode(all_interactive=True)

# Define the path to the data folder
data_folder = 'data'
data_subfolders = ['baseline', 'scheduler-optimized']

data_frames = {}

for data_sub in data_subfolders:
    # List all CSV files in the data folder
    folder = os.path.join(data_folder, data_sub)
    csv_files = [f for f in os.listdir(folder) if f.endswith('.csv')]
    data_frames[data_sub] = {}

    # Read and parse each CSV files
    for file in csv_files:
        program, dataset, prof_group = file[:-4].split('-')
        file_path = os.path.join(folder, file)
        print(f"Reading folder={data_sub}, {dataset=}, {prof_group=}")
        if data_frames[data_sub].get(dataset) is None: data_frames[data_sub][dataset] = {}
        match prof_group:
            case 'DATA':
                data_frames[data_sub][dataset][prof_group] = pd.read_csv(file_path, skiprows=8, nrows=5)
            case 'L2':
                data_frames[data_sub][dataset][prof_group] = pd.read_csv(file_path, skiprows=24, nrows=10)
            case 'L2CACHE':
                data_frames[data_sub][dataset][prof_group] = pd.read_csv(file_path, skiprows=22, nrows=8)
            # case 'TLB_DATA': # L1 cache
            #     data_frames[data_sub][dataset][prof_group] = pd.read_csv(file_path, skiprows=22, nrows=8)
            case 'CYCLE_ACTIVITY':
                data_frames[data_sub][dataset][prof_group] = pd.read_csv(file_path, skiprows=26, nrows=9)
            case _:
                print(f"Profiler group {prof_group} not recognized")

# Describe the data_frames disctionary
for folder in data_frames.keys():
    print(f"Folder: {folder}")
    for dataset in data_frames[folder].keys():
        print(f"Dataset: {dataset}")
        for group in data_frames[folder][dataset].keys():
            print(f"\tProfiler group: {group}")
            df: pd.DataFrame = data_frames[folder][dataset][group]
            df.dropna(axis=1, how='all', inplace=True)
            print(df[df.columns[:5]].head(n=10))
            print()

In [2]:
import matplotlib.pyplot as plt

FIG_SIZE = (24, 6)

def plot_L2(df, title):
    # Extract the row containing L2 data volume
    l2_data_volume = df[df['Metric'] == 'L2 data volume [GBytes]'].iloc[0, 1:]
    l2_data_volume = l2_data_volume * 1e3  # Convert to MBytes
    # show(l2_data_volume)
    l2_data_volume = l2_data_volume.astype(float).to_list()

    # Create a figure with subplots
    fig, axs = plt.subplots(1, 3, figsize=FIG_SIZE)
    fig.suptitle(title, fontsize=20)

    # Generate barplot
    axs[0].bar(range(len(l2_data_volume)), l2_data_volume)
    axs[0].set_title('Barplot of L2 Data Volume')
    axs[0].set_xlabel('Core Index')
    axs[0].set_ylabel('L2 Data Volume [MBytes]')

    # Generate histogram
    axs[1].hist(l2_data_volume, bins=10, edgecolor='black')
    axs[1].set_title('Histogram of L2 Data Volume')
    axs[1].set_xlabel('L2 Data Volume [MBytes]')
    axs[1].set_ylabel('Count')

    # Generate boxplot
    axs[2].boxplot(l2_data_volume, patch_artist=True, tick_labels=['L2 Data Volume'])
    axs[2].set_title('Boxplot of L2 Data Volume')
    axs[2].set_ylabel('L2 Data Volume [MBytes]')

    fig.tight_layout()
    plt.show()

def plot_L2CACHE(df, title):
    # Extract the row containing L2 miss ratio
    l2_miss_ratio = df[df['Metric'] == 'L2 miss ratio'].iloc[0, 1:]
    # show(l2_miss_ratio, title)
    l2_miss_ratio = l2_miss_ratio.astype(float).to_list()

    # Create a figure with subplots
    fig, axs = plt.subplots(1, 3, figsize=FIG_SIZE)
    fig.suptitle(title, fontsize=20)

    # Generate barplot
    axs[0].bar(range(len(l2_miss_ratio)), l2_miss_ratio)
    axs[0].set_title('Barplot of L2 miss ratio')
    axs[0].set_xlabel('Core Index')
    axs[0].set_ylabel('L2 miss ratio')

    # Generate histogram
    axs[1].hist(l2_miss_ratio, bins=10, edgecolor='black')
    axs[1].set_title('Histogram of L2 miss ratio')
    axs[1].set_xlabel('L2 miss ratio')
    axs[1].set_ylabel('Count')

    # Generate boxplot
    axs[2].boxplot(l2_miss_ratio, patch_artist=True, tick_labels=['L2 miss ratio'])
    axs[2].set_title('Boxplot of L2 miss ratio')
    axs[2].set_ylabel('L2 miss ratio')

    fig.tight_layout()
    plt.show()

def plot_CYCLE_ACTIVITY(df, title):
    # Extract the rows containing the required metrics
    l1d_cycles = df[df['Metric'] == 'Cycles without execution due to L1D [%]'].iloc[0, 1:]
    l2_cycles = df[df['Metric'] == 'Cycles without execution due to L2 [%]'].iloc[0, 1:]

    # Convert to list
    l1d_cycles = l1d_cycles.astype(float).to_list()
    l2_cycles = l2_cycles.astype(float).to_list()

    # Create a figure with subplots
    fig, axs = plt.subplots(2, 3, figsize=(FIG_SIZE[0], FIG_SIZE[1] * 2))
    fig.suptitle(title, fontsize=20)

    # First row: Cycles without execution due to L1D
    # Generate barplot
    axs[0, 0].bar(range(len(l1d_cycles)), l1d_cycles)
    axs[0, 0].set_title('Barplot of L1D Cycles w/o execution')
    axs[0, 0].set_xlabel('Core Index')
    axs[0, 0].set_ylabel('L1D Cycles w/o execution [%]')

    # Generate histogram
    axs[0, 1].hist(l1d_cycles, bins=10, edgecolor='black')
    axs[0, 1].set_title('Histogram of L1D Cycles w/o execution')
    axs[0, 1].set_xlabel('L1D Cycles w/o execution [%]')
    axs[0, 1].set_ylabel('Count')

    # Generate boxplot
    axs[0, 2].boxplot(l1d_cycles, patch_artist=True, tick_labels=['L1D Cycles w/o execution'])
    axs[0, 2].set_title('Boxplot of L1D Cycles w/o execution')
    axs[0, 2].set_ylabel('L1D Cycles w/o execution [%]')

    # Second row: Cycles without execution due to L2
    # Generate barplot
    axs[1, 0].bar(range(len(l2_cycles)), l2_cycles)
    axs[1, 0].set_title('Barplot of L2 Cycles w/o execution')
    axs[1, 0].set_xlabel('Core Index')
    axs[1, 0].set_ylabel('L2 Cycles w/o execution [%]')

    # Generate histogram
    axs[1, 1].hist(l2_cycles, bins=10, edgecolor='black')
    axs[1, 1].set_title('Histogram of L2 Cycles w/o execution')
    axs[1, 1].set_xlabel('L2 Cycles w/o execution [%]')
    axs[1, 1].set_ylabel('Count')

    # Generate boxplot
    axs[1, 2].boxplot(l2_cycles, patch_artist=True, tick_labels=['L2 Cycles w/o execution'])
    axs[1, 2].set_title('Boxplot of L2 Cycles w/o execution')
    axs[1, 2].set_ylabel('L2 Cycles w/o execution [%]')

    fig.tight_layout()
    plt.show()


def plot_DATA(df, title):
    # Extract the row containing MEM_INST_RETIRED_ALL_LOADS
    mem_inst_retired_all_loads = df[df['Event'] == 'MEM_INST_RETIRED_ALL_LOADS'].iloc[0, 2:]
    # show(mem_inst_retired_all_loads)
    mem_inst_retired_all_loads = mem_inst_retired_all_loads.astype(int).to_list()

    # Create a figure with subplots
    fig, axs = plt.subplots(1, 3, figsize=FIG_SIZE)
    fig.suptitle(title, fontsize=20)

    # Generate barplot
    axs[0].bar(range(len(mem_inst_retired_all_loads)), mem_inst_retired_all_loads)
    axs[0].set_title('Barplot of MEM_INST_RETIRED_ALL_LOADS')
    axs[0].set_xlabel('Core Index')
    axs[0].set_ylabel('MEM_INST_RETIRED_ALL_LOADS')

    # Generate histogram
    axs[1].hist(mem_inst_retired_all_loads, bins=10, edgecolor='black')
    axs[1].set_title('Histogram of MEM_INST_RETIRED_ALL_LOADS')
    axs[1].set_xlabel('MEM_INST_RETIRED_ALL_LOADS')
    axs[1].set_ylabel('Count')

    # Generate boxplot
    axs[2].boxplot(mem_inst_retired_all_loads, patch_artist=True, tick_labels=['LD_INSTR'])
    axs[2].set_title('Boxplot of MEM_INST_RETIRED_ALL_LOADS')
    axs[2].set_ylabel('MEM_INST_RETIRED_ALL_LOADS')

    fig.tight_layout()
    plt.show()


In [None]:
folders = ['baseline', 'scheduler-optimized']
datasets = ['Collaboration_Network_1', 'kNN_Graph_1']
groups = ['CYCLE_ACTIVITY', 'L2CACHE', 'DATA', 'L2']

for group in groups:
    for dataset in datasets:
        for folder in folders:
            df: pd.DataFrame = data_frames[folder][dataset][group]
            title = f'Folder: {folder} -- Dataset: {dataset} -- Profiler group: {group}'
            if 'random' in dataset:
                continue
            match group:
                case 'DATA':
                    plot_DATA(df, title)
                case 'L2':
                    plot_L2(df, title)
                case 'L2CACHE':
                    plot_L2CACHE(df, title)
                case 'CYCLE_ACTIVITY':
                    plot_CYCLE_ACTIVITY(df, title)
                case _:
                    print(f"Profiler group {group} not supported")