In [None]:
import sys
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install plotly
!{sys.executable} -m pip install kaleido
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install pyopencl
!{sys.executable} -m pip install nbformat


In [2]:

# Second cell - fix imports
import numpy as np
import pandas as pd  # Add pandas import
import plotly.express as px
from plotly.subplots import make_subplots
import os
import plotly.graph_objects as g

In [16]:


def visualize(csv_paths: list[str], log_y : bool = False):
    '''visualizes cub, futhark and our impl of radix-sort in a 2x2 grid layout'''
    
    color_map = {
        'futhark': '#7B68EE',  # Medium slate blue
        'our': '#FF6B6B',      # Coral red
        'cub': '#4CAF50'       # Green
    }
    
    # Create figure with 2x2 subplots
    fig = make_subplots(
        rows=2, 
        cols=2,
        subplot_titles=[f'Data type: {path.split("_")[1].split(".")[0]}' for path in csv_paths]
    )
    
    dtype_sizes = {
        'u8': 1,
        'u16': 2,
        'u32': 4,
        'u64': 8
    }
    
    for i, csv_path in enumerate(csv_paths):
        # Calculate row and column for 2x2 grid
        row = (i // 2) + 1
        col = (i % 2) + 1
        
        df = pd.read_csv(csv_path)
        
        # futhark outputs milliseconds not microseconds so we need to adjust
        df.loc[df['impl'] == 'futhark', 'microseconds'] *= 1000    
        
        # Calculate GB/s based on data type
        df['GB/s'] = df.apply(
            lambda row: (row['dataset_size'] * dtype_sizes[row['data_type']]) / 
                       (row['microseconds'] * 1e6 * 1e9),
            axis=1
        )
        
        # Create separate traces for each implementation and data type
        for impl in df['impl'].unique():
            for dtype in df['data_type'].unique():
                mask = (df['impl'] == impl) & (df['data_type'] == dtype)
                name = f"{impl} ({dtype})"
                
                fig.add_trace(
                    g.Scatter(
                        x=df[mask]['dataset_size'],
                        y=df[mask]['GB/s'],
                        mode='lines+markers',
                        name=name,
                        legendgroup=name,
                        showlegend=(i == 0),
                        line=dict(color=color_map[impl])
                    ),
                    row=row,
                    col=col
                )
    
    # Update layout
    fig.update_layout(
        title=f'Radix Sort Performance Comparison {"(logged)" if log_y else ""}',
        height=800,  # Fixed height for square layout
        width=1000,  # Fixed width for square layout
        showlegend=True,
        hovermode='x unified'
    )
    
    # Update axes for all subplots
    for row in range(1, 3):
        for col in range(1, 3):
            fig.update_xaxes(
                title='Dataset Size',
                type='log',
                row=row,
                col=col
            )
            fig.update_yaxes(
                title='Throughput (GB/s)',
                row=row,
                type='log' if log_y else 'linear',
                col=col
            )
    
    return fig
# Update files list to include all benchmark files


def save_fig(fig : g.Figure, name: str):
    current_dir = os.getcwd()
    os.chdir('..') # move from src to root
    try:
        fig.write_image(f'report/images/{name}.png')  # Changed from write_png to write_image
    except Exception as e:
        print(f"Error writing image: {e}")
        os.chdir(current_dir) # move back to src
    os.chdir(current_dir)


files = ['benchmarks_u8.csv', 'benchmarks_u16.csv', 'benchmarks_u32.csv', 'benchmarks_u64.csv']

# Main execution now only needs one call since we're creating a single combined figure
fig1 = visualize(files, log_y=True)
fig2 = visualize(files, log_y=False)
save_fig(fig1, 'combined_logged_benchmarks')
save_fig(fig2, 'combined_benchmarks')

