# cuCascade Benchmark Results Visualization

This notebook visualizes benchmark results from Google Benchmark output (gbench_results.json) using Plotly.


In [None]:
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
from pathlib import Path


## Load Benchmark Data


In [None]:
# Load the JSON file
json_file = 'gbench_results.json'

with open(json_file, 'r') as f:
    data = json.load(f)

# Extract benchmarks
benchmarks = data['benchmarks']

# Convert to DataFrame
df = pd.DataFrame(benchmarks)

print(f"Loaded {len(df)} benchmark results")
print(f"\nColumns: {list(df.columns)}")
df.head()


## Data Preprocessing


In [None]:
# Extract benchmark name components
def parse_benchmark_name(name):
    """Parse benchmark name to extract test name and parameters."""
    parts = name.split('/')
    base_name = parts[0]
    params = parts[1:] if len(parts) > 1 else []
    return base_name, params

df['base_name'], df['params'] = zip(*df['name'].apply(parse_benchmark_name))

# Calculate throughput in GB/s
if 'bytes_per_second' in df.columns:
    df['throughput_GBs'] = df['bytes_per_second'] / (1024**3)

# Convert time to milliseconds if needed
if 'real_time' in df.columns:
    df['time_ms'] = df['real_time'] if df['time_unit'].iloc[0] == 'ms' else df['real_time'] / 1000

print(f"\nUnique benchmark types: {df['base_name'].unique()}")
df[['name', 'base_name', 'throughput_GBs', 'time_ms']].head(10)


## Throughput Benchmarks Visualization


In [None]:
# Filter throughput benchmarks
throughput_benchmarks = df[df['base_name'].str.contains('Throughput', na=False)].copy()

if not throughput_benchmarks.empty:
    throughput_benchmarks['size_MB'] = throughput_benchmarks['MB']
    
    # Create throughput comparison plot
    fig = go.Figure()
    
    for bench_name in throughput_benchmarks['base_name'].unique():
        bench_data = throughput_benchmarks[throughput_benchmarks['base_name'] == bench_name].sort_values('size_MB')
        
        fig.add_trace(go.Scatter(
            x=bench_data['size_MB'],
            y=bench_data['throughput_GBs'],
            mode='lines+markers',
            name=bench_name.replace('BM_', '').replace('Throughput', ''),
            line=dict(width=2),
            marker=dict(size=8)
        ))
    
    fig.update_layout(
        title='Memory Transfer Throughput vs Data Size',
        xaxis_title='Data Size (MB)',
        yaxis_title='Throughput (GB/s)',
        xaxis_type='log',
        template='plotly_white',
        hovermode='x unified',
        width=1000,
        height=600,
        font=dict(size=12)
    )
    
    fig.show()
else:
    print("No throughput benchmarks found")


## Conversion Benchmarks Visualization


In [None]:
# Filter conversion benchmarks (but not throughput)
conversion_benchmarks = df[
    df['base_name'].str.contains('Convert|Roundtrip', na=False) & 
    ~df['base_name'].str.contains('Throughput', na=False)
].copy()

if not conversion_benchmarks.empty:
    # Extract rows and columns counters if available
    conversion_benchmarks['num_rows'] = conversion_benchmarks['rows']
    conversion_benchmarks['num_columns'] = conversion_benchmarks['columns']
    conversion_benchmarks['size_MB'] = conversion_benchmarks['bytes'] / (1024 * 1024)
    
    # Define marker symbols for different column counts
    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 
                      'triangle-down', 'star', 'hexagon', 'pentagon', 'octagon']
    
    # Create scatter plot for each benchmark type
    unique_benchmarks = conversion_benchmarks['base_name'].unique()
    
    for bench_name in unique_benchmarks:
        bench_data = conversion_benchmarks[conversion_benchmarks['base_name'] == bench_name].copy()
        
        # Create scatter plot with marker style representing number of columns
        if 'size_MB' in bench_data.columns and 'num_columns' in bench_data.columns:
            fig = go.Figure()
            
            # Get unique column counts for marker style mapping
            unique_cols = sorted(bench_data['num_columns'].unique())
            colors = px.colors.qualitative.Plotly
            
            for i, num_cols in enumerate(unique_cols):
                col_data = bench_data[bench_data['num_columns'] == num_cols].sort_values('size_MB')
                
                fig.add_trace(go.Scatter(
                    x=col_data['size_MB'],
                    y=col_data['throughput_GBs'],
                    mode='markers+lines',
                    name=f'{int(num_cols)} columns',
                    marker=dict(
                        size=10,
                        symbol=marker_symbols[i % len(marker_symbols)],
                        color=colors[i % len(colors)],
                        line=dict(width=1, color='white')
                    ),
                    line=dict(width=1.5)
                ))
            
            fig.update_layout(
                title=f'{bench_name.replace("BM_", "")} - Throughput vs Data Size',
                xaxis_title='Data Size (MB)',
                yaxis_title='Throughput (GB/s)',
                xaxis_type='log',
                template='plotly_white',
                width=1000,
                height=600,
                hovermode='closest',
                legend=dict(title='Number of Columns')
            )
            
            fig.show()
else:
    print("No conversion benchmarks found")
