In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path

# --- Architectural and Roofline Constants ---
CLK_FREQUENCY_HZ = 0.94 * 1e9  # 940 MHz
BYTES_PER_ELEMENT = 2          # Assuming FP16 or BF16 for data movement

# 2D Array Configuration
PE_2D_ROWS = 256
PE_2D_COLS = 256
TOTAL_2D_PES = PE_2D_ROWS * PE_2D_COLS
FLOPS_PER_PE_PER_CYCLE_2D = 2  # Assumed FLOPs (e.g., FMA = 2 FLOPs) per 2D PE per cycle
PEAK_2D_PERFORMANCE_GFLOPS = (TOTAL_2D_PES * FLOPS_PER_PE_PER_CYCLE_2D * CLK_FREQUENCY_HZ) / 1e9

# 1D Array Configuration
TOTAL_1D_PES = 256
FLOPS_PER_PE_PER_CYCLE_1D = 2  # Assumed FLOPs for general 1D PE ops; specialized units might differ
PEAK_1D_PERFORMANCE_GFLOPS = (TOTAL_1D_PES * FLOPS_PER_PE_PER_CYCLE_1D * CLK_FREQUENCY_HZ) / 1e9

# Memory System
MEMORY_BANDWIDTH_GB_S = 400
MEMORY_BANDWIDTH_B_S = MEMORY_BANDWIDTH_GB_S * 1e9 # Bytes per second

# --- Model Core Architectural Parameters ---
# (B: Batch, D: Hidden Size, H: Heads, E: Q/K Head Dim, F: V Head Dim, S: FFN Intermediate Size)
# num_layers is for calculating 'Overall' model performance.
BERT_BASE_PARAMS = {
    "name": "BERT-base", "num_layers": 12,
    "B": 64, "D": 768, "H": 12, "E": 64, "F": 64, "S": 3072
}
OPT_9B_PARAMS = {
    "name": "OPT-9B", "num_layers": 64, # From user-provided config
    "B": 64, "D": 9216, "H": 72, "E": 128, "F": 128, "S": 36864
}

# --- FLOP Calculation Functions ---

def calculate_fusemax_attn_flops_one_layer(B, H, M_query, P_kv, E_head, F_head, M0_spatial_dim):
    """
    Estimates FLOPs for one layer of FuseMax attention (Cascade 5 from paper).
    M_query: Query sequence length (this is Transformer.M from code)
    P_kv: Key/Value sequence length (this is Transformer.P from code)
    M0_spatial_dim: Size of the M0 dimension mapped spatially (e.g., PE_2D_COLS)

    FLOP Counting Assumptions:
    - MAC operation (Multiply-Accumulate): 2 FLOPs
    - Element-wise Add, Subtract, Max, Product, Divide: 1 FLOP each
    - Exponentiation (exp): Treated as 6 MACs = 12 FLOPs (as per FuseMax paper discussion)
    """
    flops = 0
    
    if M0_spatial_dim == 0:
        M0_spatial_dim = 1 
        print("Warning: M0_spatial_dim was 0, set to 1 in FLOPs calculation.")
        
    M_outer = M_query / M0_spatial_dim 

    flops += 2 * B * H * M_query * P_kv * E_head # E44: BQK
    flops += B * H * M_outer * P_kv * M0_spatial_dim # E45: LM
    flops += B * H * M_outer * P_kv  # E46: RM
    flops += B * H * M_query * P_kv * (1 + 12)  # E47: SLN
    flops += B * H * M_outer * P_kv * M0_spatial_dim # E48: SLD
    flops += 2 * B * H * M_outer * P_kv * F_head * M0_spatial_dim # E49: SLNV
    flops += B * H * M_outer * P_kv * (1 + 12) # E50: PRM
    flops += B * H * M_outer * P_kv # E51: SPD
    flops += B * H * M_outer * P_kv # E52: RD_new
    flops += B * H * M_outer * P_kv * F_head # E53: SPNV
    flops += B * H * M_outer * P_kv * F_head # E54: RNV_new
    flops += B * H * P_kv * F_head # E55: AV
    
    return flops

def calculate_mlp_linear_flops_one_layer(B, SeqLen, D, S, H, E_head, F_head):
    """Calculates FLOPs for one transformer layer's linear components."""
    flops = 0
    flops += 3 * (2 * B * SeqLen * D * D) # Q, K, V projections
    flops += (2 * B * SeqLen * D * D)     # Output projection
    flops += (2 * B * SeqLen * D * S)     # FFN1 (D -> S)
    flops += (2 * B * SeqLen * S * D)     # FFN2 (S -> D)
    return flops

def get_sim_data(csv_path, model_key_to_match, seq_key_to_match):
    """
    Reads traffic (Bytes) and latency (seconds) from simulation CSVs.
    Adjust column names if your CSV uses different ones (e.g., 'model' instead of 'model_key').
    """
    try:
        df = pd.read_csv(csv_path)
        # Flexible column name checking
        model_col = 'model_key' if 'model_key' in df.columns else 'model'
        seq_col = 'seq_key' if 'seq_key' in df.columns else 'seq_len'

        data_row = df[(df[model_col] == model_key_to_match) & 
                      (df[seq_col] == seq_key_to_match)]
        
        if not data_row.empty:
            traffic_bytes = float(data_row['traffic'].iloc[0])
            latency_s = float(data_row['latency'].iloc[0])
            if latency_s == 0: 
                print(f"Warning: Latency is 0 for {model_key_to_match}, {seq_key_to_match}. GFLOPs/s will be Inf/NaN.")
                return traffic_bytes, float('inf') 
            return traffic_bytes, latency_s
        else:
            print(f"Warning: Data for {model_key_to_match}, {seq_key_to_match} not found in {csv_path}")
            return None, None
    except FileNotFoundError:
        print(f"Error: CSV file not found at {csv_path}")
        return None, None
    except KeyError as e:
        print(f"Error: Column {e} not found in {csv_path}. Ensure CSV has required columns.")
        return None, None
    except Exception as e:
        print(f"An error occurred while reading CSV for {model_key_to_match}, {seq_key_to_match}: {e}")
        return None, None

# --- Plotting Function ---
def plot_roofline_figure(fig_ax, title, points_to_plot, xlim_min=0.01, xlim_max=1000):
    """Generates a Roofline plot."""
    ax = fig_ax
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlabel('Operational Intensity (FLOPs/Byte)')
    ax.set_ylabel('Performance (GFLOPs/s)')
    ax.set_title(title)
    ax.grid(True, which="both", ls="-", alpha=0.3)

    ax.axhline(PEAK_2D_PERFORMANCE_GFLOPS, ls='--', color='black', alpha=0.7, label=f'Peak 2D ({PEAK_2D_PERFORMANCE_GFLOPS:.0f} GFLOPs/s)')
    ax.axhline(PEAK_1D_PERFORMANCE_GFLOPS, ls=':', color='dimgray', alpha=0.7, label=f'Peak 1D ({PEAK_1D_PERFORMANCE_GFLOPS:.0f} GFLOPs/s)')

    oi_intersect_2d = PEAK_2D_PERFORMANCE_GFLOPS / (MEMORY_BANDWIDTH_B_S / 1e9)
    effective_xlim_min = max(xlim_min, 1e-3) 
    oi_values_for_mem_line = np.array([effective_xlim_min, oi_intersect_2d])
    
    oi_values_for_mem_line = oi_values_for_mem_line[oi_values_for_mem_line > 0]
    if len(oi_values_for_mem_line) < 2 and oi_intersect_2d > effective_xlim_min : 
        oi_values_for_mem_line = np.array([effective_xlim_min, oi_intersect_2d])
    elif len(oi_values_for_mem_line) < 2 : 
         oi_values_for_mem_line = np.array([effective_xlim_min, effective_xlim_min*10]) 

    ax.plot(oi_values_for_mem_line, oi_values_for_mem_line * (MEMORY_BANDWIDTH_B_S / 1e9), ls='--', color='black', alpha=0.7, label=f'Mem BW ({MEMORY_BANDWIDTH_GB_S} GB/s)')

    plotted_labels = set()
    for label, oi, gflops, color, marker, _ in points_to_plot:
        if oi is None or gflops is None or np.isinf(gflops) or np.isnan(gflops) or gflops <=0 or oi <=0:
            print(f"Skipping point {label} due to invalid OI/GFLOPs: OI={oi}, GFLOPs={gflops}")
            continue
        
        legend_label = label if label not in plotted_labels else None
        ax.plot(oi, gflops, marker=marker, color=color, markersize=7, label=legend_label, linestyle='None')
        text_label = label.split('(')[0].strip() 
        if len(text_label) > 15: text_label = text_label[:12] + "..."
        ax.text(oi * 1.15, gflops * 0.95, text_label, fontsize=7, va='top', ha='left')
        if legend_label:
            plotted_labels.add(label)
            
    ax.set_xlim(xlim_min, xlim_max)
    all_plot_gflops = [p[2] for p in points_to_plot if p[2] is not None and p[2] > 0 and not np.isinf(p[2])]
    min_perf_on_plot = min(all_plot_gflops) if all_plot_gflops else 0.01 
    ax.set_ylim(min(min_perf_on_plot * 0.5, 0.01), PEAK_2D_PERFORMANCE_GFLOPS * 1.5) 
    
    if plotted_labels:
        ax.legend(fontsize='x-small', loc='lower right')

In [None]:
# --- Main Script Logic ---
if __name__ == "__main__":
    # !!! USER ACTION REQUIRED: Set this to your experiment name !!!
    # This is the name of the directory created under ../outputs/generated/
    # by your pythia.ipynb (or runpythia.py) execution.
    # For example, if your output path was ../outputs/generated/my_pythia_run_april27/
    # then USER_EXPERIMENT_NAME should be "my_pythia_run_april27"
    USER_EXPERIMENT_NAME = "YOUR_EXPERIMENT_DIRECTORY_NAME_HERE"  # E.g., "2025-04-27_14-36-20" or "my_custom_run"

    if USER_EXPERIMENT_NAME == "YOUR_EXPERIMENT_DIRECTORY_NAME_HERE":
        print("ERROR: Please set the USER_EXPERIMENT_NAME variable in the script to match your experiment output directory.")
        exit()

    # Construct the path to the CSV file based on the user's experiment name
    # This assumes your runpythia.py saves a CSV like 'attn-binding_all_scenarios.csv'
    # or a similar name for the 'binding' (FuseMax) accelerator.
    # Adjust "attn-binding_all_scenarios.csv" if your script names it differently.
    base_output_path = Path("../outputs/generated/") / USER_EXPERIMENT_NAME
    SIM_RESULTS_CSV_PATH = base_output_path / "results" / "attn-binding_all_scenarios.csv" 
    # ^ This is the most likely name if you used the 'run_all_quadrant_scenarios_main' from my run.py example.
    # If your runpythia.py created a simpler CSV, e.g., just 'attn-binding.csv' for a single run, adjust this.
    # For example, if your runpythia.py's attn function was called with accel="binding" and it created
    # a CSV named "attn-binding.csv" directly in "base_output_path / results", then use that.
    # SIM_RESULTS_CSV_PATH = base_output_path / "results" / "attn-binding.csv" # Example for a simpler CSV name

    output_figs_dir = base_output_path / "figs_roofline" # Save figs within the experiment directory
    output_figs_dir.mkdir(parents=True, exist_ok=True)

    print(f"Attempting to load simulation data from: {SIM_RESULTS_CSV_PATH}")

    # --- Data Preparation for FIGURE 1: BERT and OPT Components ---
    points_fig1 = []

    # BERT-base, 1K sequence length (Prefill: M_query=1024, P_kv=1024)
    bert_attn_flops = calculate_fusemax_attn_flops_one_layer(
        B=BERT_BASE_PARAMS["B"], H=BERT_BASE_PARAMS["H"], 
        M_query=1024, P_kv=1024, 
        E_head=BERT_BASE_PARAMS["E"], F_head=BERT_BASE_PARAMS["F"], 
        M0_spatial_dim=PE_2D_COLS
    )
    # Ensure these model_key and seq_key match EXACTLY what's in your CSV and cascade.py
    bert_attn_bytes, bert_attn_latency = get_sim_data(SIM_RESULTS_CSV_PATH, "BERT_Base_1K_Prefill", "1024ctx_1024q")
    if bert_attn_bytes and bert_attn_latency:
        points_fig1.append(("BERT Attn (1K)", bert_attn_flops / bert_attn_bytes, (bert_attn_flops / bert_attn_latency) / 1e9, 'blue', 'o', PEAK_2D_PERFORMANCE_GFLOPS))
    
    # OPT-9B, 2K sequence length (Prefill: M_query=2048, P_kv=2048)
    opt_attn_flops = calculate_fusemax_attn_flops_one_layer(
        B=OPT_9B_PARAMS["B"], H=OPT_9B_PARAMS["H"],
        M_query=2048, P_kv=2048, 
        E_head=OPT_9B_PARAMS["E"], F_head=OPT_9B_PARAMS["F"],
        M0_spatial_dim=PE_2D_COLS
    )
    opt_attn_bytes, opt_attn_latency = get_sim_data(SIM_RESULTS_CSV_PATH, "OPT_9B_2K_Prefill", "2048ctx_2048q")
    if opt_attn_bytes and opt_attn_latency:
        points_fig1.append(("OPT Attn (2K)", opt_attn_flops / opt_attn_bytes, (opt_attn_flops / opt_attn_latency) / 1e9, 'red', 'o', PEAK_2D_PERFORMANCE_GFLOPS))

    # Placeholders for MLP/Overall - requires data from linear layer simulations
    print("MLP and Overall points for Figure 1 are placeholders and require data from linear layer simulations.")

    fig1_plot, ax1_plot = plt.subplots(figsize=(10, 7))
    plot_roofline_figure(ax1_plot, "Figure 1: BERT & OPT Attention Roofline", points_fig1, xlim_min=0.1, xlim_max=2000)
    fig1_plot.tight_layout()
    fig1_plot.savefig(output_figs_dir / "roofline_fig1_bert_opt_attn.png")
    print(f"Saved Figure 1 to {output_figs_dir / 'roofline_fig1_bert_opt_attn.png'}")
    plt.close(fig1_plot)

    # --- Data Preparation for FIGURE 2: OPT Model - Varying M=P (Attention only) ---
    points_fig2 = []
    opt_core_fig2 = OPT_9B_PARAMS

    # (ContextLength, Label_Suffix, model_key_for_cascade, seq_key_for_cascade)
    opt_attn_scenarios_fig2 = [
        (2048, "LC Prefill (2K)", "OPT_9B_LC_Prefill", "2048ctx_2048q"), 
        (256,  "SC Prefill (256)", "OPT_9B_SC_Prefill", "256ctx_256q"),  
    ]

    for context_len, label_suffix, model_key, seq_key in opt_attn_scenarios_fig2:
        current_attn_flops = calculate_fusemax_attn_flops_one_layer(
            B=opt_core_fig2["B"], H=opt_core_fig2["H"],
            M_query=context_len, P_kv=context_len, 
            E_head=opt_core_fig2["E"], F_head=opt_core_fig2["F"],
            M0_spatial_dim=PE_2D_COLS
        )
        bytes_val, latency_val = get_sim_data(SIM_RESULTS_CSV_PATH, model_key, seq_key)
        
        if bytes_val and latency_val:
            points_fig2.append((f"OPT Attn ({label_suffix})", current_attn_flops / bytes_val, (current_attn_flops / latency_val) / 1e9, 'purple', 'X', PEAK_2D_PERFORMANCE_GFLOPS))
        else:
            print(f"Skipping OPT Attn ({label_suffix}) due to missing data for keys: {model_key}, {seq_key}")
            
    fig2_plot, ax2_plot = plt.subplots(figsize=(10, 7))
    plot_roofline_figure(ax2_plot, "Figure 2: OPT Model Attention - Varying Context Length (M=P)", points_fig2, xlim_min=0.1, xlim_max=2000)
    fig2_plot.tight_layout()
    fig2_plot.savefig(output_figs_dir / "roofline_fig2_opt_varying_context.png")
    print(f"Saved Figure 2 to {output_figs_dir / 'roofline_fig2_opt_varying_context.png'}")
    plt.close(fig2_plot)

    print("Roofline plotting script finished.")

Error: CSV file not found at ../outputs/generated/all_quadrant_scenarios_test/results/attn-binding_all_scenarios.csv
Error: CSV file not found at ../outputs/generated/all_quadrant_scenarios_test/results/attn-binding_all_scenarios.csv
Saved Figure 1 to roofline_plots/roofline_fig1_bert_opt_components.png
Error: CSV file not found at ../outputs/generated/all_quadrant_scenarios_test/results/attn-binding_all_scenarios.csv
Skipping OPT Attn (LC Prefill) due to missing data for keys: OPT_9B_LC_Prefill, 2048ctx_2048q
Error: CSV file not found at ../outputs/generated/all_quadrant_scenarios_test/results/attn-binding_all_scenarios.csv
Skipping OPT Attn (SC Prefill) due to missing data for keys: OPT_9B_SC_Prefill, 256ctx_256q
Saved Figure 2 to roofline_plots/roofline_fig2_opt_varying_context.png
Roofline plotting script finished.
