In [1]:
import pandas as pd

In [2]:
from glob import glob

folder_name = 'parallel_reduction1_benchmarks'
# files = glob(f'{folder_name}/**/*.txt')

sizes = [1024, 1000000, 1000000000, 2000000000]
file_names = [
    "output", "ncu_basic", "ncu_memory_usage", "ncu_warp_efficiency", "ncu_threaddiv"
]


# group files by a key function
def key_func(file, file_dict):
    key = int(file.split('/')[-2].split('_')[-1])
    if key not in file_dict:
        file_dict[key] = {}

    file_name = file.split('/')[-1].split('.')[0]
    if file_name in file_names:
        file_dict[key][file_name] = file

    return file_dict



In [3]:
# def contents of a file as a single string

def read_file_contents(file):
    with open(file, 'r') as f:
        return f.read()
    

# from the string, get the line which contains a text
def get_line_with_text(text, line):
    lines = text.split('\n')
    for l in lines:
        if line in l:
            return l
    return None


In [4]:
metric_names = [
    'cudaEventRecord', 'Duration', 'DRAM mem', 'L1 mem', 'Warp exec %', 'Warp avg threads', 'Mem thrpt', 'DRAM thrpt', 'L1 thrpt', 'L2 thrpt', 'SM Active Cycles', 'Occupancy', 'Branch eff'
    ]

def get_metrics(size_wise_files, size):
    metrics = {'Input Size': size}

    output_txt = read_file_contents(size_wise_files['output'])
    ncu_basic_txt = read_file_contents(size_wise_files['ncu_threaddiv'])
    ncu_memory_usage_txt = read_file_contents(size_wise_files['ncu_memory_usage'])
    ncu_warp_efficiency_txt = read_file_contents(size_wise_files['ncu_warp_efficiency'])
    ncu_threaddiv_txt = read_file_contents(size_wise_files['ncu_threaddiv'])

    
    metrics['cudaEventRecord'] = get_line_with_text(output_txt, 'Kernel Execution Time:').split(' ')[-2] 

    duration_out = get_line_with_text(ncu_basic_txt, 'Duration').split()
    metrics['Duration'] = duration_out[-1] + ' ' + duration_out[-2]

    mem_out = get_line_with_text(ncu_memory_usage_txt, 'dram__bytes_read.sum').split()
    metrics['DRAM mem'] = mem_out[-1] + ' ' + mem_out[-2]
    mem_out = get_line_with_text(ncu_memory_usage_txt, 'l1tex__t_bytes.sum').split()
    metrics['L1 mem'] = mem_out[-1] + ' ' + mem_out[-2]

    warp_out = get_line_with_text(ncu_warp_efficiency_txt, 'smsp__thread_inst_executed_per_inst_executed.pct').split()
    metrics['Warp exec %'] = warp_out[-1]
    warp_out = get_line_with_text(ncu_warp_efficiency_txt, 'smsp__thread_inst_executed_per_inst_executed.ratio').split()
    metrics['Warp avg threads'] = warp_out[-1]

    thrpt_out = get_line_with_text(ncu_threaddiv_txt, 'Memory Throughput').split()
    metrics['Mem thrpt'] = thrpt_out[-1]
    thrpt_out = get_line_with_text(ncu_threaddiv_txt, 'DRAM Throughput').split()
    metrics['DRAM thrpt'] = thrpt_out[-1]
    thrpt_out = get_line_with_text(ncu_threaddiv_txt, 'L1/TEX Cache Throughput').split()
    metrics['L1 thrpt'] = thrpt_out[-1]
    thrpt_out = get_line_with_text(ncu_threaddiv_txt, 'L2 Cache Throughput').split()
    metrics['L2 thrpt'] = thrpt_out[-1]

    sm_out = get_line_with_text(ncu_threaddiv_txt, 'Compute (SM) Throughput').split()
    metrics['SM Throughput'] = sm_out[-1]
    occupancy_out = get_line_with_text(ncu_threaddiv_txt, 'Achieved Occupancy').split()
    metrics['Occupancy'] = occupancy_out[-1]

    branch_out = get_line_with_text(ncu_threaddiv_txt, 'Branch Efficiency').split()
    metrics['Branch eff'] = branch_out[-1]

    print(metrics)

    return metrics



In [5]:
def get_folder_metrics(folder_name):
    method_name = '_'.join(folder_name.split('/')[-1].split('_')[:-1])
    files = glob(f'{folder_name}/**/*.txt')

    file_dict = {}
    for file in files:
        file_dict = key_func(file, file_dict)

    print("Files: ", file_dict)
    

    row = []
    for size in sizes:
        print(f"Size: {size}")
        size_wise_files = file_dict[size]
        metrics = get_metrics(size_wise_files, size)
        row.append(metrics)

    df = pd.DataFrame(row)
    df = df.set_index('Input Size')

    df.to_csv(f'output_metrics/{method_name}.csv')
    return df

In [6]:
glob('*benchmarks')

['parallel_reduction4_benchmarks',
 'parallel_reduction2a_benchmarks',
 'parallel_reduction2b_benchmarks',
 'parallel_reduction2_benchmarks',
 'llm_based_gpt4o_mini_benchmarks',
 'parallel_reduction3_benchmarks',
 'parallel_reduction3a_benchmarks',
 'parallel_reduction2c_benchmarks',
 'parallel_reduction1_benchmarks',
 'parallel_reduction3b_benchmarks']

In [7]:
folders = [
    'parallel_reduction1_benchmarks',
    'parallel_reduction2a_benchmarks',
    'parallel_reduction2b_benchmarks',
    'parallel_reduction2c_benchmarks',
    'parallel_reduction3a_benchmarks',
    'parallel_reduction3b_benchmarks',
    'parallel_reduction4_benchmarks',
    'llm_based_gpt4o_mini_benchmarks'
]

In [8]:
for folder in folders:
    print(f"Processing folder: {folder}")
    df = get_folder_metrics(folder)
    print(df)
    print("\n")

Processing folder: parallel_reduction1_benchmarks
Files:  {1024: {'ncu_threaddiv': 'parallel_reduction1_benchmarks/parallel_reduction1_1024/ncu_threaddiv.txt', 'ncu_basic': 'parallel_reduction1_benchmarks/parallel_reduction1_1024/ncu_basic.txt', 'output': 'parallel_reduction1_benchmarks/parallel_reduction1_1024/output.txt', 'ncu_memory_usage': 'parallel_reduction1_benchmarks/parallel_reduction1_1024/ncu_memory_usage.txt', 'ncu_warp_efficiency': 'parallel_reduction1_benchmarks/parallel_reduction1_1024/ncu_warp_efficiency.txt'}, 1000000000: {'ncu_threaddiv': 'parallel_reduction1_benchmarks/parallel_reduction1_1000000000/ncu_threaddiv.txt', 'ncu_basic': 'parallel_reduction1_benchmarks/parallel_reduction1_1000000000/ncu_basic.txt', 'output': 'parallel_reduction1_benchmarks/parallel_reduction1_1000000000/output.txt', 'ncu_memory_usage': 'parallel_reduction1_benchmarks/parallel_reduction1_1000000000/ncu_memory_usage.txt', 'ncu_warp_efficiency': 'parallel_reduction1_benchmarks/parallel_reduct