In [1]:
import subprocess
from pathlib import Path

import pandas as pd

from deep_mca.utils import disassemble_hex, wrap_asm

In [2]:
raw_data_path = Path("../data/bhive/benchmark/throughput/skl.csv")
df = pd.read_csv(raw_data_path, header=None, names=["hex", "cycles_100"])

df["hex"] = df["hex"].fillna("").astype(str).str.strip()
df = df[df["hex"] != ""]

df["cycles_100"] = pd.to_numeric(df["cycles_100"], errors="coerce")
df = df.dropna(subset=["cycles_100"]).reset_index(drop=True)

In [3]:
df.shape

(314876, 2)

In [4]:
df.head()

Unnamed: 0,hex,cycles_100
0,4183ff0119c083e00885c98945c4b8010000000f4fc139c2,249.0
1,4889de4889c24c89ff,91.0
2,48895d1844886520488945004889e84883c4085b5d415c...,330.0
3,0fb7d5448d40ff8d0cd28d348a4421c689f14c8d0ccd00...,361.0
4,418b4424084d8b3424498d2cc64939ee,100.0


In [5]:
def run_llvm_mca(asm: str, mcpu: str = "skylake", iterations: int = 100) -> float:
    cmd = [
        "llvm-mca",
        "-mtriple=x86_64",
        f"-mcpu={mcpu}",
        f"-iterations={iterations}",
    ]
    proc = subprocess.run(
        cmd,
        input=asm,
        text=True,
        capture_output=True,
        check=False,
    )
    if proc.returncode != 0:
        raise RuntimeError(proc.stderr.strip())

    for line in proc.stdout.splitlines():
        if "Block RThroughput:" in line:
            return float(line.split(":")[1].strip())

    raise RuntimeError("llvm-mca output missing Block RThroughput")

In [6]:
def analyze_row(row, mcpu: str = "skylake", iterations: int = 100, intel: bool = False):
    asm_lines = disassemble_hex(row["hex"], output_intel_syntax=intel)
    asm = wrap_asm(asm_lines)
    rthroughput = run_llvm_mca(asm, mcpu=mcpu, iterations=iterations)
    mca_cycles_100 = rthroughput * iterations

    print(f"True cycles: {row['cycles_100']}, llvm-mca cycles: {mca_cycles_100}")
    print("Source code:")
    for line in asm_lines:
        print(line)

In [7]:
analyze_row(df.iloc[0])

True cycles: 249.0, llvm-mca cycles: 130.0
Source code:
cmpl	$1, %r15d
sbbl	%eax, %eax
andl	$8, %eax
testl	%ecx, %ecx
movl	%eax, -60(%rbp)
movl	$1, %eax
cmovgl	%ecx, %eax
cmpl	%eax, %edx


In [8]:
analyze_row(df.iloc[1])

True cycles: 91.0, llvm-mca cycles: 80.0
Source code:
movq	%rbx, %rsi
movq	%rax, %rdx
movq	%r15, %rdi


In [9]:
analyze_row(df.iloc[2])

True cycles: 330.0, llvm-mca cycles: 300.0
Source code:
movq	%rbx, 24(%rbp)
movb	%r12b, 32(%rbp)
movq	%rax, (%rbp)
movq	%rbp, %rax
addq	$8, %rsp
popq	%rbx
popq	%rbp
popq	%r12
popq	%r13
