In [1]:
import pandas as pd

In [2]:
def calculate_speedup_and_efficiency(df: pd.DataFrame, num_threads_column: str = 'num_threads', duration_column: str = 'save_duration_seconds') -> pd.DataFrame:
    """
    Calculates speedup and efficiency for a DataFrame of benchmark results.

    Assumes the DataFrame contains results for different numbers of threads,
    including a run with 1 thread, which serves as the baseline.

    Args:
        df (pd.DataFrame): The input DataFrame containing benchmark results.
                          Must have columns for thread count and duration.
        num_threads_column (str): The name of the column containing the number of threads.
        duration_column (str): The name of the column containing the duration (runtime) in seconds.

    Returns:
        pd.DataFrame: A new DataFrame with 'speedup' and 'efficiency' columns added,
                      or an empty DataFrame if a single-thread baseline is not found.
    """
    if df.empty:
        print("Warning: Input DataFrame is empty.")
        return pd.DataFrame()

    # Get the baseline runtime (1 thread)
    baseline_row = df[df[num_threads_column] == 1]
    if baseline_row.empty:
        print(f"Error: Baseline (1 {num_threads_column}) not found in the DataFrame. Cannot calculate speedup.")
        return pd.DataFrame()

    baseline_runtime = baseline_row[duration_column].iloc[0]

    if baseline_runtime == 0:
        print("Warning: Baseline runtime is zero. Speedup and efficiency calculations will result in division by zero.")
        return pd.DataFrame()

    # Calculate Speedup
    # Speedup = Runtime (1 thread) / Runtime (P threads)
    df['speedup'] = baseline_runtime / df[duration_column]

    # Calculate Efficiency
    # Efficiency = Speedup / Number of threads
    df['efficiency'] = df['speedup'] / df[num_threads_column]

    return df

In [3]:
def extract_efficiency_and_speedup_at_threads(dfs: list[pd.DataFrame], threads = 2):
    extracted_data = []
    for i, df in enumerate(dfs):
        df_at_threads = df[df['num_threads'] == threads]
        efficiency_val = df_at_threads['efficiency'].iloc[0]
        value_size_val = df_at_threads['value_size'].iloc[0]
        speedup_val = df_at_threads['speedup'].iloc[0]

        extracted_data.append({'value_size': value_size_val, f'efficiency_at_{threads}_threads': efficiency_val, f'speedup_at_{threads}_threads': speedup_val })

    result_df = pd.DataFrame(extracted_data)
    result_df = result_df.sort_values(by='value_size').reset_index(drop=True)
    
    return result_df

In [4]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_workload_performance(df_metrics: pd.DataFrame, threads: int = 2):
    """
    Generates an interactive Plotly graph showing efficiency and speedup
    vs. value_size for a specific number of threads.

    Args:
        df_metrics (pd.DataFrame): DataFrame containing 'value_size',
                                   'efficiency_at_{threads}_threads',
                                   and 'speedup_at_{threads}_threads' columns.
                                   This DataFrame is typically generated by
                                   extract_efficiency_and_speedup_at_threads.
        threads (int): The number of threads for which the data is being plotted.
    """
    if df_metrics.empty:
        print("Error: Input DataFrame is empty. Cannot generate plot.")
        return

    # Check for required columns
    required_efficiency_col = f'efficiency_at_{threads}_threads'
    required_speedup_col = f'speedup_at_{threads}_threads'
    required_cols = ['value_size', required_efficiency_col, required_speedup_col]

    if not all(col in df_metrics.columns for col in required_cols):
        print(f"Error: DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison
    # Adjust these ranges based on your expected min/max values across all datasets
    # For speedup, it typically starts at 1.0 (baseline) and goes up.
    # For efficiency, it's typically between 0.0 and 1.0 (or slightly above 1.0 for superlinear).
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup at {threads} Threads vs. Value Size',
                                        f'Efficiency at {threads} Threads vs. Value Size'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_speedup_col],
            mode='lines+markers',
            name=f'Speedup at {threads} Threads',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2)
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Baseline for speedup
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_efficiency_col],
            mode='lines+markers',
            name=f'Efficiency at {threads} Threads',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2)
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[0.5] * len(df_metrics), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling with Workload (at {threads} Threads)',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(), # Set specific tick values
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()], # Add 'B' for bytes
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(),
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_at_{threads}_threads.html")

In [5]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_single_df_performance(df: pd.DataFrame, workload_description: str = "Performance Metrics"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads for a single DataFrame, with consistent y-axis ranges.

    Args:
        df (pd.DataFrame): The input DataFrame containing 'num_threads', 'speedup',
                           and 'efficiency' columns.
        workload_description (str): A descriptive string for the plot title,
                                    e.g., "Value Size 50 Bytes, No Compression".
    """
    if df.empty or 'num_threads' not in df.columns or 'speedup' not in df.columns or 'efficiency' not in df.columns:
        print("Error: DataFrame must contain 'num_threads', 'speedup', and 'efficiency' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # IMPORTANT: Adjust these ranges based on the MIN and MAX values you expect
    # across ALL your benchmark datasets for both speedup and efficiency.
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup', f'Efficiency'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['speedup'],
            mode='lines+markers',
            name='Speedup',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2, color='royalblue')
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Baseline for speedup
            mode='lines',
            name='Ideal Speedup',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['efficiency'],
            mode='lines+markers',
            name='Efficiency',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2, color='firebrick')
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[0.5] * len(df), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_description} vs. Number of Threads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(), # Ensure ticks match thread counts
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(),
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    # The key change for consistent axes is setting the 'range' property
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_metrics_{workload_description.replace(' ', '_').replace('.', '')}.html")

In [6]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # Import plotly.express for its color sequences

def plot_combined_workload_performance(dfs: list[pd.DataFrame], workload_type_description: str = "Overall Performance"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. value_size for multiple thread counts on the same chart, with distinct colors.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_type_description (str): A general description for the plot title,
                                         e.g., "with Compression ON".
    """
    if not dfs:
        print("Error: Input list of DataFrames is empty. Cannot generate plot.")
        return

    # Combine all DataFrames into a single one for easier processing
    combined_df = pd.concat(dfs, ignore_index=True)

    # Ensure required columns exist
    required_cols = ['num_threads', 'value_size', 'speedup', 'efficiency']
    if not all(col in combined_df.columns for col in required_cols):
        print(f"Error: Combined DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Ensure speedup/efficiency are numeric
    combined_df['speedup'] = pd.to_numeric(combined_df['speedup'], errors='coerce')
    combined_df['efficiency'] = pd.to_numeric(combined_df['efficiency'], errors='coerce')
    combined_df.dropna(subset=['speedup', 'efficiency'], inplace=True)

    # Define fixed y-axis ranges for consistent comparison
    speedup_y_range = [0.0, max(5.0, combined_df['speedup'].max() * 1.1)]
    efficiency_y_range = [0.0, max(1.1, combined_df['efficiency'].max() * 1.1)]

    # Get unique thread counts for different lines
    unique_threads = sorted(combined_df['num_threads'].unique())
    # Get unique value sizes for x-axis ticks
    unique_value_sizes = sorted(combined_df['value_size'].unique())

    # --- Color and Marker Definition ---
    # Using Plotly Express's default qualitative color sequence for distinctness
    colors = px.colors.qualitative.Plotly
    # Define a list of distinct marker symbols
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    
    # Map thread count to a color and marker
    color_map = {thread: colors[i % len(colors)] for i, thread in enumerate(unique_threads)}
    marker_map = {thread: markers[i % len(markers)] for i, thread in enumerate(unique_threads)}
    # --- End Color and Marker Definition ---


    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Value Size',
                                        f'Efficiency vs. Value Size'))

    # Plot Speedup for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['speedup'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=True, # Show legend for each thread group
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=1
        )
    # Add ideal speedup line
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'), # Keep ideal lines distinct and subtle
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['efficiency'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=False, # Hide legend for efficiency to avoid duplication
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=2
        )
    # Add ideal efficiency lines
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[0.5] * len(unique_value_sizes),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_type_description}',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # fig.write_html(f"combined_performance_scaling_{workload_type_description.replace(' ', '_').replace('.', '')}.html")

In [7]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # For distinct colors and markers

def plot_performance_by_workload_on_threads(dfs: list[pd.DataFrame], workload_names: list[str]):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads, with each line representing a different workload.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_names (list[str]): A list of descriptive names for each workload,
                                     used for legend labels (e.g., ["50 Bytes", "500 Bytes"]).
                                     Length must match dfs.
    """
    if not dfs or not workload_names or len(dfs) != len(workload_names):
        print("Error: Input lists 'dfs' and 'workload_names' must not be empty and must have matching lengths.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # Adjust these ranges based on the MIN and MAX values you expect across ALL your datasets.
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Get all unique thread counts across all DFs for x-axis ticks
    all_num_threads = sorted(pd.concat(df['num_threads'] for df in dfs).unique())

    # --- Color and Marker Definition ---
    colors = px.colors.qualitative.Plotly # A good default palette
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    # --- End Color and Marker Definition ---

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Number of Threads',
                                        f'Efficiency vs. Number of Threads'))

    # Plot Speedup for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['speedup'],
                mode='lines+markers',
                name=f'Speedup: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=True
            ),
            row=1, col=1
        )

    # Add Ideal Speedup line (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='(1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['efficiency'],
                mode='lines+markers',
                name=f'Efficiency: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=False # Hide legend here to avoid duplication, as it's shown for Speedup
            ),
            row=1, col=2
        )

    # Add Ideal Efficiency lines (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[0.5] * len(all_num_threads),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling Across Workloads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes for clarity
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_across_workloads.html")

In [8]:
num_keys = 50.0
value_size = 50
compression_on = True
test_configs = [
    {"num_keys": 100.0, "value_size": 50},
    {"num_keys": 40.0, "value_size": 300},
    {"num_keys": 40.0, "value_size": 500},
    {"num_keys": 5.0, "value_size": 5000},
    {"num_keys": 1.0, "value_size": 20000},
    {"num_keys": 0.1, "value_size": 100000},

    ]

def load_df(test_config: dict, compression_on: bool):
    num_keys = test_config["num_keys"]
    value_size = test_config["value_size"]
    compression_str = "yes" if compression_on else "no"
    df = pd.read_csv(f"save_summary_{num_keys}keys_{value_size}B_comp-{compression_str}_csum-yes.csv")
    return df


dfs_compression_on = [load_df(config, compression_on=True) for config in test_configs]
dfs_compression_on = [calculate_speedup_and_efficiency(df) for df in dfs_compression_on]
dfs_compression_off = [load_df(config, compression_on=False) for config in test_configs]
dfs_compression_off = [calculate_speedup_and_efficiency(df) for df in dfs_compression_off]

print("\n" + "="*50) # Top border
print("                 COMPRESSION ON")
print("="*50 + "\n") # Bottom border

all_workload_names = ["Value Size 50B", "Value Size 500B", "Value Size 5,000B", "Value Size 20,000B", "Value Size 100,000B"]
plot_performance_by_workload_on_threads(dfs_compression_on, all_workload_names)
plot_combined_workload_performance(dfs_compression_on)

print("\n" + "="*50) # Top border
print("                 COMPRESSION OFF")
print("="*50 + "\n") # Bottom border

all_workload_names = ["Value Size 50B", "Value Size 500B", "Value Size 5,000B", "Value Size 20,000B", "Value Size 100,000B"]
plot_performance_by_workload_on_threads(dfs_compression_off, all_workload_names)
plot_combined_workload_performance(dfs_compression_off)
        


                 COMPRESSION ON

Error: Input lists 'dfs' and 'workload_names' must not be empty and must have matching lengths.



                 COMPRESSION OFF

Error: Input lists 'dfs' and 'workload_names' must not be empty and must have matching lengths.


In [9]:
dfs_compression_on[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,100000000,50,1,yes,yes,104.433036,109.179994,6900000108,ok,7000,63.198392,99.923428,63.15,0,6900006912,11027730432,63,964,1.0,1.0
1,100000000,50,2,yes,yes,241.290732,252.258497,6900000108,ok,7000,27.352895,185.208917,50.66,0,6900002816,11028254720,11713,198,2.310483,1.155241
2,100000000,50,3,yes,yes,317.119136,331.533647,6900000108,ok,7000,20.812368,259.413063,53.99,0,6900006912,11029303296,10637,81,3.036579,1.012193
3,100000000,50,4,yes,yes,376.329598,393.435495,6900000108,ok,7000,17.537818,316.002823,55.42,0,6900002816,11030351872,9086,76,3.603549,0.900887
4,100000000,50,6,yes,yes,378.243895,395.436806,6900000108,ok,7000,17.449059,321.048833,56.02,0,6900006912,11030876160,19708,45,3.62188,0.603647
5,100000000,50,8,yes,yes,384.873801,402.368071,6900000108,ok,7000,17.148478,308.482183,52.9,0,6900002816,11032449024,14315,34,3.685364,0.460671
6,100000000,50,10,yes,yes,384.200004,401.663647,6900000108,ok,7000,17.178553,315.393276,54.18,0,6900006912,11033497600,10823,37,3.678913,0.367891


In [11]:
dfs_compression_off[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,100000000,50,1,no,yes,135.340782,141.492638,6900000111,ok,7000,48.765789,99.865092,48.7,0,6900006912,11029413888,97,750,1.0,1.0
1,100000000,50,2,no,yes,344.473669,360.131569,6900000111,ok,7000,19.159665,180.431132,34.57,0,6900002816,11029938176,3875,199,2.545232,1.272616
2,100000000,50,3,no,yes,384.886507,402.381355,6900000111,ok,7000,17.147912,211.745895,36.31,0,6900006912,11029938176,11643,114,2.843832,0.947944
3,100000000,50,4,no,yes,385.135572,402.641741,6900000111,ok,7000,17.136823,217.076413,37.2,0,6900002816,11029938176,30509,50,2.845673,0.711418
4,100000000,50,6,no,yes,384.361608,401.832597,6900000111,ok,7000,17.17133,228.753393,39.28,0,6900006912,11029938176,15586,50,2.839954,0.473326
5,100000000,50,8,no,yes,385.523387,403.047184,6900000111,ok,7000,17.119584,219.689918,37.61,0,6900002816,11030462464,12226,39,2.848538,0.356067
6,100000000,50,10,no,yes,384.882523,402.37719,6900000111,ok,7000,17.14809,222.88197,38.22,0,6900006912,11030462464,9884,32,2.843803,0.28438


In [12]:
dfs_compression_on[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,50000000,500,1,yes,yes,140.222482,141.306718,25999492238,ok,7000,183.99332,99.878626,183.77,0,25999499264,29149958144,175,2790,1.0,1.0
1,50000000,500,2,yes,yes,359.440826,362.220115,25999492238,ok,7000,71.778157,181.406163,130.21,0,25999499264,29151006720,41475,552,2.563361,1.28168
2,50000000,500,3,yes,yes,399.299175,402.386659,25999492238,ok,7000,64.613206,211.133308,136.42,0,25999499264,29151531008,60360,283,2.847612,0.949204
3,50000000,500,4,yes,yes,399.115913,402.20198,25999492238,ok,7000,64.642875,228.254081,147.55,0,25999499264,29152579584,112531,189,2.846305,0.711576
4,50000000,500,6,yes,yes,398.837234,401.921146,25999492238,ok,7000,64.688043,232.624135,150.48,0,25999499264,29153628160,70215,172,2.844317,0.474053
5,50000000,500,8,yes,yes,399.273474,402.360759,25999492238,ok,7000,64.617366,236.112999,152.57,0,25999499264,29154676736,51875,120,2.847428,0.355929
6,50000000,500,10,yes,yes,399.090225,402.176094,25999492238,ok,7000,64.647036,240.428658,155.43,0,25999499264,29156773888,40050,108,2.846122,0.284612


In [13]:
dfs_compression_off[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,50000000,500,1,no,yes,215.079878,216.747165,26000000111,ok,7000,119.955433,99.920443,119.86,0,26000007168,29150056448,114,1794,1.0,1.0
1,50000000,500,2,no,yes,399.623265,402.721122,26000000111,ok,7000,64.560806,106.209951,68.57,0,26000007168,29150056448,235476,274,1.858023,0.929011
2,50000000,500,3,no,yes,397.302621,400.382488,26000000111,ok,7000,64.937905,108.149469,70.23,0,26000007168,29150056448,152481,172,1.847233,0.615744
3,50000000,500,4,no,yes,399.266999,402.362094,26000000111,ok,7000,64.618413,109.380587,70.68,0,26000007168,29150056448,113180,121,1.856366,0.464092
4,50000000,500,6,no,yes,399.356483,402.452271,26000000111,ok,7000,64.603934,111.959126,72.33,0,26000007168,29150056448,73149,75,1.856782,0.309464
5,50000000,500,8,no,yes,399.442771,402.539228,26000000111,ok,7000,64.589979,113.779261,73.49,0,26000007168,29150056448,54206,69,1.857183,0.232148
6,50000000,500,10,no,yes,399.365703,402.461563,26000000111,ok,7000,64.602443,116.032764,74.96,0,26000007168,29150056448,42525,81,1.856825,0.185683


In [14]:
dfs_compression_on[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,yes,yes,126.371144,113.676679,22560618003,ok,7000,198.463029,99.887622,198.24,0,22560624640,26104631296,191,2855,1.0,1.0
1,5000000,5000,2,yes,yes,294.704054,265.099903,22560618003,ok,7000,85.102324,187.644698,159.69,0,22560624640,26105155584,23957,665,2.332052,1.166026
2,5000000,5000,3,yes,yes,409.829972,368.660983,22560618003,ok,7000,61.19611,263.399096,161.19,0,22560624640,26105679872,23000,339,3.243066,1.081022
3,5000000,5000,4,yes,yes,447.318964,402.384062,22560618003,ok,7000,56.067375,290.418449,162.83,0,22560624640,26106728448,41772,123,3.539724,0.884931
4,5000000,5000,6,yes,yes,445.589361,400.828204,22560618003,ok,7000,56.285006,308.021641,173.37,0,22560624640,26107252736,63095,129,3.526037,0.587673
5,5000000,5000,8,yes,yes,446.834789,401.948524,22560618003,ok,7000,56.128127,312.428025,175.36,0,22560624640,26107777024,46561,122,3.535893,0.441987
6,5000000,5000,10,yes,yes,447.137901,402.221188,22560618003,ok,7000,56.090079,316.383939,177.46,0,22560624640,26109349888,36320,121,3.538291,0.353829


In [15]:
dfs_compression_off[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,no,yes,294.121116,294.355664,25100000111,ok,7000,85.270994,99.705651,85.02,0,25100005376,26106761216,123,1222,1.0,1.0
1,5000000,5000,2,no,yes,398.288962,398.606579,25100000111,ok,7000,62.969357,84.930198,53.48,0,25100005376,26106761216,212805,226,1.354166,0.677083
2,5000000,5000,3,no,yes,401.783348,402.103752,25100000111,ok,7000,62.421701,86.332156,53.89,0,25100005376,26106761216,144728,169,1.366047,0.455349
3,5000000,5000,4,no,yes,402.048479,402.369094,25100000111,ok,7000,62.380537,111.364864,69.47,4096,25100005376,26106761216,117452,161,1.366949,0.341737
4,5000000,5000,6,no,yes,401.855265,402.175726,25100000111,ok,7000,62.41053,109.084157,68.08,0,25100005376,26106761216,73991,106,1.366292,0.227715
5,5000000,5000,8,no,yes,402.1331,402.453782,25100000111,ok,7000,62.36741,124.263617,77.5,0,25100005376,26106761216,57416,95,1.367236,0.170905
6,5000000,5000,10,no,yes,402.113199,402.433866,25100000111,ok,7000,62.370497,123.664239,77.13,0,25100005376,26107285504,38621,129,1.367169,0.136717


In [16]:
dfs_compression_on[3]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,1000000,20000,1,yes,yes,175.888556,140.100202,15943309302,ok,7000,113.799331,99.939076,113.73,0,15943315456,20712423424,143,1739,1.0,1.0
1,1000000,20000,2,yes,yes,360.731154,287.332552,15943309302,ok,7000,55.487306,186.475082,103.47,0,15943315456,20712947712,15099,420,2.050907,1.025454
2,1000000,20000,3,yes,yes,504.042084,401.483755,15943309302,ok,7000,39.71097,260.557726,103.47,0,15943315456,20713472000,16857,243,2.86569,0.95523
3,1000000,20000,4,yes,yes,505.378166,402.547983,15943309302,ok,7000,39.605985,264.783215,104.87,0,15943315456,20714520576,30633,128,2.873286,0.718322
4,1000000,20000,6,yes,yes,505.143988,402.361453,15943309302,ok,7000,39.624346,277.379975,109.91,0,15943315456,20715008000,40867,113,2.871955,0.478659
5,1000000,20000,8,yes,yes,500.34976,398.542715,15943309302,ok,7000,40.004016,284.696414,113.89,0,15943315456,20717629440,29524,87,2.844698,0.355587
6,1000000,20000,10,yes,yes,505.024136,402.265987,15943309302,ok,7000,39.633749,290.938914,115.31,0,15943315456,20720775168,23451,61,2.871273,0.287127


In [17]:
dfs_compression_off[3]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,1000000,20000,1,no,yes,402.519014,402.659785,20023000111,ok,7000,49.726843,90.152515,44.83,0,20023005184,20714500096,45632,621,1.0,1.0
1,1000000,20000,2,no,yes,402.294981,402.435674,20023000111,ok,7000,49.754536,87.127735,43.35,0,20023005184,20715024384,153742,270,0.999443,0.499722
2,1000000,20000,3,no,yes,402.307891,402.448588,20023000111,ok,7000,49.752939,101.320647,50.41,0,20023005184,20715024384,112693,179,0.999475,0.333158
3,1000000,20000,4,no,yes,402.362534,402.503251,20023000111,ok,7000,49.746182,96.248592,47.88,0,20023005184,20715024384,81695,136,0.999611,0.249903
4,1000000,20000,6,no,yes,402.160711,402.301357,20023000111,ok,7000,49.771147,131.260788,65.33,0,20023005184,20715024384,56607,81,0.99911,0.166518
5,1000000,20000,8,no,yes,402.223007,402.363675,20023000111,ok,7000,49.763439,119.646072,59.54,0,20023005184,20715024384,40842,59,0.999265,0.124908
6,1000000,20000,10,no,yes,402.232905,402.373576,20023000111,ok,7000,49.762214,143.663221,71.49,0,20023005184,20715024384,36343,43,0.999289,0.099929


In [19]:
dfs_compression_on[4]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,100000,100000,1,yes,yes,216.152058,165.391208,7652838093,ok,7000,46.27113,99.932723,46.24,0,7652843520,10285142016,58,727,1.0,1.0
1,100000,100000,2,yes,yes,393.069312,300.761459,7652838093,ok,7000,25.444876,186.285048,47.4,0,7652839424,10285142016,5664,239,1.818485,0.909243
2,100000,100000,3,yes,yes,523.771603,400.769804,7652838093,ok,7000,19.095346,252.365157,48.19,0,7652843520,10285666304,9320,137,2.423163,0.807721
3,100000,100000,4,yes,yes,525.350046,401.977568,7652838093,ok,7000,19.037973,257.012655,48.93,0,7652839424,10287239168,16951,68,2.430465,0.607616
4,100000,100000,6,yes,yes,524.459609,401.29624,7652838093,ok,7000,19.070296,271.679055,51.81,0,7652843520,10288275456,20275,56,2.426346,0.404391
5,100000,100000,8,yes,yes,526.11071,402.559599,7652838093,ok,7000,19.010447,277.4264,52.74,0,7652839424,10290372608,14841,33,2.433984,0.304248
6,100000,100000,10,yes,yes,525.269895,401.916239,7652838093,ok,7000,19.040878,280.606808,53.43,0,7652843520,10293485568,11758,35,2.430094,0.243009


In [20]:
dfs_compression_off[4]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,100000,100000,1,no,yes,402.847745,402.875944,10002300111,ok,7000,24.827246,63.720318,15.82,0,10002305024,10282569728,33259,208,1.0,1.0
1,100000,100000,2,no,yes,402.159452,402.187603,10002300111,ok,7000,24.869738,74.709273,18.58,0,10002300928,10283094016,69687,137,0.998291,0.499146
2,100000,100000,3,no,yes,402.456196,402.484368,10002300111,ok,7000,24.8514,91.061267,22.63,0,10002305024,10284142592,46666,100,0.999028,0.333009
3,100000,100000,4,no,yes,402.194255,402.222409,10002300111,ok,7000,24.867585,103.146323,25.65,0,10002300928,10284142592,30517,61,0.998378,0.249594
4,100000,100000,6,no,yes,402.449984,402.478156,10002300111,ok,7000,24.851784,118.462321,29.44,0,10002305024,10284142592,20008,30,0.999013,0.166502
5,100000,100000,8,no,yes,402.439915,402.468086,10002300111,ok,7000,24.852406,151.212726,37.58,0,10002300928,10285191168,20164,29,0.998988,0.124873
6,100000,100000,10,no,yes,401.568714,401.596823,10002300111,ok,7000,24.906323,245.881339,61.24,0,10002305024,10285191168,16710,23,0.996825,0.099683
