In [1]:
import math
import pandas as pd
from glob import glob
import os
import re

# Directory containing the LIKWID report files
report_dir = "./likwid_reports"
rep_files = glob(os.path.join(report_dir, "likwid_*_FULL.csv"))

In [2]:
# Find all .csv files in the current directory
rep_files = glob(os.path.join(".", "likwid_reps", "likwid*_FULL.csv"))
print(f"Found {len(rep_files)} report files.")
print(f"Report files: {rep_files}")

# Extract tile IDs from likwid_$TILE.csv filenames
tile_ids = [re.split(r"likwid_", f)[-1].split("_FULL.csv")[0] for f in rep_files]
print(f"Extracted tile ID: {tile_ids}")

# Parse each file and store the data in a list of DataFrames
dfs = []
for f, tile_id in zip(rep_files, tile_ids):
    df = pd.read_csv(f, skiprows=1)  # Skip the first row
    df["Tile"] = tile_id  # Add a column for the tile ID
    dfs.append(df)

Found 0 report files.
Report files: []
Extracted tile ID: []


In [None]:
# Loop through each DataFrame and calculate FLOPs and memory data volume
tflop_values = []
tbyte_values = []
runtime_values = []
for df in dfs:
    # filter out rows that contain "FP_ARITH_INST_RETIRED.*SINGLE" in the first column
    fp_df = df[df.iloc[:, 0].str.contains(r"FP_ARITH_INST_RETIRED.*SINGLE STAT", regex=True)]

    vector_widths = [4, 1, 8] # SP Packed 128, SP SCALAR, SP Packed 256
    total_flops = 0 
    for i, vw in enumerate(vector_widths):
        flops = float(fp_df.iloc[i, 2]) * vw
        total_flops += flops

    # Multiply by 2 for FMA and convert to TFLOPs
    total_flops *= 2 / 1e12

    # Filter out MEM result
    mem_df = df[df.iloc[:, 0].str.contains(r"Memory data volume.*STAT", regex=True)]
    mem_tb = float(mem_df.iloc[0, 1]) / 1024  # Convert to TB

    # Extract elapsed time value
    runtime = float(re.split(r":\s+", df.iloc[-1, 0])[-1])
    
    tflop_values.append(total_flops)
    tbyte_values.append(mem_tb)
    runtime_values.append(runtime)

# construct new dataframe with tile, tflops, tbytes, runtime
results_df = pd.DataFrame({
    "Tile": tile_ids,
    "Runtime": runtime_values,
    "TFLOPs": tflop_values,
    "TB": tbyte_values,
})
print(results_df)

# Average across tiles
avg_tflops = sum(tflop_values) / len(tflop_values)
avg_tbytes = sum(tbyte_values) / len(tbyte_values)
avg_runtime = sum(runtime_values) / len(runtime_values)

print("---------------------------------")
print(f"Average TFLOPs: {avg_tflops:.2f} TFLOPs")
print(f"Average TB: {avg_tbytes:.2f} TB")
print(f"Average Runtime: {avg_runtime:.2f} seconds")

Empty DataFrame
Columns: [Tile, Runtime, TFLOPs, TB]
Index: []


ZeroDivisionError: division by zero