In [None]:
import os
import re

# Define the path to the data folder
data_folder = 'data_profiling_perf'

data = {}
pattern = re.compile(r'^\s*([\d,]+)\s+([\w\.-]+).*$', re.MULTILINE)

for f in os.listdir(data_folder):
    if f.endswith('.log'):
        program, threads, dataset = f.split('-')
        threads = int(threads)
        dataset = dataset.replace('.log', '')

        if program not in data: data[program] = {}
        if threads not in data[program]: data[program][threads] = {}

        # Parse the file content
        matches = []
        with open(os.path.join(data_folder, f), 'r') as file:
            matches = pattern.findall(file.read())

        # Create a dataframe from the parsed data
        _data = {}
        for match in matches:
            value, metric = match
            _data[metric] = int(value.replace(',', ''))
        # _data['cache-miss-ratio'] = _data['cache-misses'] / _data['cache-references']
        _data['L1-miss-ratio'] = _data['L1-dcache-load-misses'] / _data['L1-dcache-loads']
        _data['L2-miss-ratio'] = _data['l2_rqsts.demand_data_rd_miss'] / _data['l2_rqsts.all_demand_data_rd']
        _data['L3-miss-ratio'] = _data['LLC-load-misses'] / _data['LLC-loads']
        _data['L1-miss-rate'] = _data['L1-dcache-load-misses'] / _data['mem_inst_retired.all_loads']
        _data['L2-miss-rate'] = _data['l2_rqsts.demand_data_rd_miss'] / _data['mem_inst_retired.all_loads']
        _data['L3-miss-rate'] = _data['LLC-load-misses'] / _data['mem_inst_retired.all_loads']

        data[program][threads][dataset] = _data
    

for program, threads_data in data.items():
    print(program)
    for threads, datasets in threads_data.items():
        print(f'  {threads} threads')
        for dataset, df in datasets.items():
            print(f'    {dataset}')
            print(df)
            print()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

FONT_TITLE = 18
FONT_AXES = 18
FONT_TICKS = 14
FONT_LEGEND = 14

datasets = [
    # 'Social_Network_1',
    # 'Web_Graph_1',
    # 'Collaboration_Network_1',
    # 'Synthetic_Dense_1',
    'Road_Network_1',
    'Road_Network_2',
    'kNN_Graph_1',
    'Synthetic_Sparse_1',
]

programs = ['large', 'classic']

for threads in [32]:
    for kind in ['rate', 'ratio']:
        for metric in ['L1-miss', 'L2-miss', 'L3-miss']:
            metric = metric + '-' + kind
            # Extract cache miss ratios for each dataset and program
            cache_miss_ratios = {program: [data[program][threads][dataset][metric] for dataset in datasets] for program in programs}

            # Plot the cache miss ratios
            x = np.arange(len(datasets))  # the label locations
            width = 0.2  # the width of the bars

            fig, ax = plt.subplots(figsize=(12, 6))

            for i, program in enumerate(programs):
                ax.bar(x + i * width, cache_miss_ratios[program], width, label=program)

            ax.set_ylabel(metric, fontsize=FONT_TICKS)
            ax.set_title(f'{metric} for {threads} threads', fontsize=FONT_TITLE)
            ax.set_xticks(x + width / len(programs))
            ax.set_xticklabels(datasets, rotation=45, fontsize=FONT_TICKS-2)
            ax.legend(loc='best', fontsize=FONT_LEGEND)
            ax.grid(True, linestyle=':')

            fig.tight_layout()
            plt.show()

In [None]:
from scipy.stats import gmean
import pandas as pd

datasets = [
    # 'Social_Network_1',
    # 'Web_Graph_1',
    # 'Collaboration_Network_1',
    # 'Synthetic_Dense_1',
    'Road_Network_1',
    'Road_Network_2',
    'kNN_Graph_1',
    'Synthetic_Sparse_1',
]
programs = ['large', 'classic']
metrics = [
    'L1-miss-ratio',
    'L2-miss-ratio',
    'L3-miss-ratio',
    'L1-miss-rate',
    'L2-miss-rate',
    'L3-miss-rate',
]

# Initialize a dictionary to store the geometric means of relative improvements
relative_improvements = {metric: None for metric in metrics}
geometric_means = {program: {metric: None for metric in metrics} for program in programs}

threads = 24
for metric in metrics:
    # Compute the geometric mean of cache miss ratios for each program
    gmeans = {}
    for program in programs:
        mean = gmean([data[program][threads][dataset][metric] for dataset in datasets])
        gmeans[program] = mean
        geometric_means[program][metric] = mean
    # Compute the relative improvement of 'large' with respect to 'classic'
    relative_improvement = gmeans['large'] / gmeans['classic']
    relative_improvement = 1 - relative_improvement
    relative_improvement = relative_improvement * 100
    relative_improvement = -relative_improvement
    relative_improvements[metric] = relative_improvement

# Add columns for the actual values of each program
for program in programs:
    for metric in metrics:
        relative_improvements[f'{program}-{metric}'] = geometric_means[program][metric] * 100

# Create a DataFrame from the relative improvements dictionary
improvement_df = pd.DataFrame(relative_improvements, index=['Geomean'])

# Transpose the DataFrame to have metrics as rows and geomean as columns
improvement_df = improvement_df.T

# Reshape the DataFrame to have 'L1', 'L2', 'L3' as rows and 'Rate', 'Ratio' as columns
reshaped_df = pd.DataFrame({
    r'Mer Rate': improvement_df.loc[['large-L1-miss-rate', 'large-L2-miss-rate', 'large-L3-miss-rate'], 'Geomean'].values,
    r'Mer Ratio': improvement_df.loc[['large-L1-miss-ratio', 'large-L2-miss-ratio', 'large-L3-miss-ratio'], 'Geomean'].values,
    'Ref Rate': improvement_df.loc[['classic-L1-miss-rate', 'classic-L2-miss-rate', 'classic-L3-miss-rate'], 'Geomean'].values,
    'Ref Ratio': improvement_df.loc[['classic-L1-miss-ratio', 'classic-L2-miss-ratio', 'classic-L3-miss-ratio'], 'Geomean'].values,
    'Rate impr.': improvement_df.loc[['L1-miss-rate', 'L2-miss-rate', 'L3-miss-rate'], 'Geomean'].values,
    'Ratio impr.': improvement_df.loc[['L1-miss-ratio', 'L2-miss-ratio', 'L3-miss-ratio'], 'Geomean'].values,
}, index=['L1', 'L2', 'L3'])

# Display the reshaped DataFrame
print(reshaped_df.map(lambda x: f"{x:.1f}%"))
print()
print(reshaped_df.map(lambda x: f"{x:.1f}"+r"\%").to_latex())