# cuCascade Benchmark Results Visualization

This notebook visualizes benchmark results from Google Benchmark output (gbench_results.json) using Plotly.


In [None]:
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
from pathlib import Path


## Load Benchmark Data


In [None]:
# Load the JSON file
json_file = 'gbench_results.json'

with open(json_file, 'r') as f:
    data = json.load(f)

# Extract benchmarks
benchmarks = data['benchmarks']

# Convert to DataFrame
df = pd.DataFrame(benchmarks)

print(f"Loaded {len(df)} benchmark results")
print(f"\nColumns: {list(df.columns)}")
df.head()


## Data Preprocessing


In [None]:
# Extract benchmark name components
def parse_benchmark_name(name):
    """Parse benchmark name to extract test name and parameters."""
    parts = name.split('/')
    base_name = parts[0]
    params = parts[1:] if len(parts) > 1 else []
    return base_name, params

df['base_name'], df['params'] = zip(*df['name'].apply(parse_benchmark_name))

# Extract thread count from name (Google Benchmark format: benchmark_name/threads:N)
df['threads'] = df['threads'] if 'threads' in df.columns else 1

# Calculate throughput in GB/s
if 'bytes_per_second' in df.columns:
    df['throughput_GBs'] = df['bytes_per_second'] / (1024**3)

# Convert time to milliseconds if needed
if 'real_time' in df.columns:
    df['time_ms'] = df['real_time'] if df['time_unit'].iloc[0] == 'ms' else df['real_time'] / 1000

print(f"\nUnique benchmark types: {df['base_name'].unique()}")
print(f"Thread counts: {sorted(df['threads'].unique())}")
df[['name', 'base_name', 'threads', 'throughput_GBs', 'time_ms']].head(10)


## Process Conversion Benchmarks


In [None]:
# Filter and process different benchmark types
throughput_benchmarks = df[df['base_name'].str.contains('Throughput', na=False)].copy()
conversion_benchmarks = df[
    df['base_name'].str.contains('Convert', na=False) & 
    ~df['base_name'].str.contains('Throughput', na=False)
].copy()

# Process throughput benchmarks
if not throughput_benchmarks.empty and 'MB' in throughput_benchmarks.columns:
    throughput_benchmarks['size_MB'] = throughput_benchmarks['MB']

# Process conversion benchmarks
if not conversion_benchmarks.empty:
    # Extract rows and columns counters
    if 'columns' in conversion_benchmarks.columns:
        conversion_benchmarks['num_columns'] = conversion_benchmarks['columns']
    elif 'param1' in conversion_benchmarks.columns:
        conversion_benchmarks['num_columns'] = conversion_benchmarks['param1']
    
    # Calculate data size in MB
    if 'bytes' in conversion_benchmarks.columns:
        conversion_benchmarks['size_MB'] = conversion_benchmarks['bytes'] / (1024 * 1024)
    elif 'param0' in conversion_benchmarks.columns:
        # param0 is total bytes
        conversion_benchmarks['size_MB'] = conversion_benchmarks['param0'] / (1024 * 1024)

print(f"Throughput benchmarks: {len(throughput_benchmarks)}")
print(f"Conversion benchmarks: {len(conversion_benchmarks)}")


## Performance Analysis by Thread Count

Create separate plots for each thread count. Each plot shows HostToGpu (left) and GpuToHost (right) with both raw and conversion results combined.


In [None]:
# Get unique thread counts
thread_counts = sorted(df['threads'].unique())

print(f"Creating plots for thread counts: {thread_counts}")

# Filter for GpuToHost and HostToGpu benchmarks
filtered_throughput = throughput_benchmarks[
    throughput_benchmarks['base_name'].str.contains('GpuToHost|HostToGpu', na=False)
].copy()

filtered_conversion = conversion_benchmarks[
    conversion_benchmarks['base_name'].str.contains('GpuToHost|HostToGpu', na=False)
].copy()

# Create separate plot for each thread count
for thread_count in thread_counts:
    # Filter data for this thread count
    thread_throughput = filtered_throughput[filtered_throughput['threads'] == thread_count]
    thread_conversion = filtered_conversion[filtered_conversion['threads'] == thread_count]
    
    # Create subplot with 1 row and 2 columns (HostToGpu left, GpuToHost right)
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=(f'HostToGpu', f'GpuToHost'),
        horizontal_spacing=0.12,
        shared_yaxes=True
    )
    
    # Collect all throughput values to determine y-axis range
    all_throughputs = []
    
    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 
                      'triangle-down', 'star', 'hexagon', 'pentagon', 'octagon']
    colors_raw = px.colors.qualitative.Set1
    colors_conv = px.colors.qualitative.Plotly
    
    # === Left column: HostToGpu (raw + conversion) ===
    
    # Add raw HostToGpu throughput
    h2g_raw = thread_throughput[thread_throughput['base_name'].str.contains('HostToGpu', na=False)]
    if not h2g_raw.empty:
        h2g_raw_sorted = h2g_raw.sort_values('size_MB')
        all_throughputs.extend(h2g_raw_sorted['throughput_GBs'].tolist())
        
        fig.add_trace(
            go.Scatter(
                x=h2g_raw_sorted['size_MB'],
                y=h2g_raw_sorted['throughput_GBs'],
                mode='lines+markers',
                name='Raw',
                line=dict(width=3, color=colors_raw[1]),
                marker=dict(size=10),
                legendgroup='h2g',
                legendgrouptitle_text='HostToGpu',
                showlegend=True
            ),
            row=1, col=1
        )
    
    # Add HostToGpu conversion benchmarks
    h2g_conv = thread_conversion[thread_conversion['base_name'].str.contains('HostToGpu', na=False)]
    if not h2g_conv.empty and 'size_MB' in h2g_conv.columns and 'num_columns' in h2g_conv.columns:
        unique_cols = sorted(h2g_conv['num_columns'].unique())
        
        for col_idx, num_cols in enumerate(unique_cols):
            col_data = h2g_conv[h2g_conv['num_columns'] == num_cols].sort_values('size_MB')
            all_throughputs.extend(col_data['throughput_GBs'].tolist())
            
            trace_name = f"Convert ({int(num_cols)}col)"
            
            fig.add_trace(
                go.Scatter(
                    x=col_data['size_MB'],
                    y=col_data['throughput_GBs'],
                    mode='markers+lines',
                    name=trace_name,
                    marker=dict(
                        size=8,
                        symbol=marker_symbols[col_idx % len(marker_symbols)],
                        color=colors_conv[col_idx % len(colors_conv)],
                        line=dict(width=1, color='white')
                    ),
                    line=dict(width=1.5),
                    legendgroup='h2g',
                    showlegend=True
                ),
                row=1, col=1
            )
    
    # === Right column: GpuToHost (raw + conversion) ===
    
    # Add raw GpuToHost throughput
    g2h_raw = thread_throughput[thread_throughput['base_name'].str.contains('GpuToHost', na=False)]
    if not g2h_raw.empty:
        g2h_raw_sorted = g2h_raw.sort_values('size_MB')
        all_throughputs.extend(g2h_raw_sorted['throughput_GBs'].tolist())
        
        fig.add_trace(
            go.Scatter(
                x=g2h_raw_sorted['size_MB'],
                y=g2h_raw_sorted['throughput_GBs'],
                mode='lines+markers',
                name='Raw',
                line=dict(width=3, color=colors_raw[0]),
                marker=dict(size=10),
                legendgroup='g2h',
                legendgrouptitle_text='GpuToHost',
                showlegend=True
            ),
            row=1, col=2
        )
    
    # Add GpuToHost conversion benchmarks
    g2h_conv = thread_conversion[thread_conversion['base_name'].str.contains('GpuToHost', na=False)]
    if not g2h_conv.empty and 'size_MB' in g2h_conv.columns and 'num_columns' in g2h_conv.columns:
        unique_cols = sorted(g2h_conv['num_columns'].unique())
        
        for col_idx, num_cols in enumerate(unique_cols):
            col_data = g2h_conv[g2h_conv['num_columns'] == num_cols].sort_values('size_MB')
            all_throughputs.extend(col_data['throughput_GBs'].tolist())
            
            trace_name = f"Convert ({int(num_cols)}col)"
            
            fig.add_trace(
                go.Scatter(
                    x=col_data['size_MB'],
                    y=col_data['throughput_GBs'],
                    mode='markers+lines',
                    name=trace_name,
                    marker=dict(
                        size=8,
                        symbol=marker_symbols[col_idx % len(marker_symbols)],
                        color=colors_conv[col_idx % len(colors_conv)],
                        line=dict(width=1, color='white')
                    ),
                    line=dict(width=1.5),
                    legendgroup='g2h',
                    showlegend=True
                ),
                row=1, col=2
            )
    
    # Calculate y-axis range with some padding
    if all_throughputs:
        y_min = min(all_throughputs) * 0.9
        y_max = max(all_throughputs) * 1.1
    else:
        y_min, y_max = 0, 100
    
    # Update axes
    fig.update_xaxes(title_text="Data Size (MB)", type="log", row=1, col=1)
    fig.update_xaxes(title_text="Data Size (MB)", type="log", row=1, col=2)
    fig.update_yaxes(title_text="Throughput (GB/s)", range=[y_min, y_max], row=1, col=1)
    fig.update_yaxes(range=[y_min, y_max], row=1, col=2)
    
    # Update layout
    thread_label = f"{thread_count} thread" if thread_count == 1 else f"{thread_count} threads"
    fig.update_layout(
        title_text=f"Performance with {thread_label}: HostToGpu vs GpuToHost",
        template='plotly_white',
        width=1600,
        height=600,
        hovermode='closest',
        legend=dict(
            orientation="v",
            yanchor="top",
            y=0.98,
            xanchor="left",
            x=1.01,
            font=dict(size=10),
            tracegroupgap=20
        )
    )
    
    fig.show()
