In [1]:
import pandas as pd

In [2]:
def calculate_speedup_and_efficiency(df: pd.DataFrame, num_threads_column: str = 'num_threads', duration_column: str = 'save_duration_seconds') -> pd.DataFrame:
    """
    Calculates speedup and efficiency for a DataFrame of benchmark results.

    Assumes the DataFrame contains results for different numbers of threads,
    including a run with 1 thread, which serves as the baseline.

    Args:
        df (pd.DataFrame): The input DataFrame containing benchmark results.
                          Must have columns for thread count and duration.
        num_threads_column (str): The name of the column containing the number of threads.
        duration_column (str): The name of the column containing the duration (runtime) in seconds.

    Returns:
        pd.DataFrame: A new DataFrame with 'speedup' and 'efficiency' columns added,
                      or an empty DataFrame if a single-thread baseline is not found.
    """
    if df.empty:
        print("Warning: Input DataFrame is empty.")
        return pd.DataFrame()

    # Get the baseline runtime (1 thread)
    baseline_row = df[df[num_threads_column] == 1]
    if baseline_row.empty:
        print(f"Error: Baseline (1 {num_threads_column}) not found in the DataFrame. Cannot calculate speedup.")
        return pd.DataFrame()

    baseline_runtime = baseline_row[duration_column].iloc[0]

    if baseline_runtime == 0:
        print("Warning: Baseline runtime is zero. Speedup and efficiency calculations will result in division by zero.")
        return pd.DataFrame()

    # Calculate Speedup
    # Speedup = Runtime (1 thread) / Runtime (P threads)
    df['speedup'] = baseline_runtime / df[duration_column]

    # Calculate Efficiency
    # Efficiency = Speedup / Number of threads
    df['efficiency'] = df['speedup'] / df[num_threads_column]

    return df

In [3]:
def extract_efficiency_and_speedup_at_threads(dfs: list[pd.DataFrame], threads = 2):
    extracted_data = []
    for i, df in enumerate(dfs):
        df_at_threads = df[df['num_threads'] == threads]
        efficiency_val = df_at_threads['efficiency'].iloc[0]
        value_size_val = df_at_threads['value_size'].iloc[0]
        speedup_val = df_at_threads['speedup'].iloc[0]

        extracted_data.append({'value_size': value_size_val, f'efficiency_at_{threads}_threads': efficiency_val, f'speedup_at_{threads}_threads': speedup_val })

    result_df = pd.DataFrame(extracted_data)
    result_df = result_df.sort_values(by='value_size').reset_index(drop=True)
    
    return result_df

In [4]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_workload_performance(df_metrics: pd.DataFrame, threads: int = 2):
    """
    Generates an interactive Plotly graph showing efficiency and speedup
    vs. value_size for a specific number of threads.

    Args:
        df_metrics (pd.DataFrame): DataFrame containing 'value_size',
                                   'efficiency_at_{threads}_threads',
                                   and 'speedup_at_{threads}_threads' columns.
                                   This DataFrame is typically generated by
                                   extract_efficiency_and_speedup_at_threads.
        threads (int): The number of threads for which the data is being plotted.
    """
    if df_metrics.empty:
        print("Error: Input DataFrame is empty. Cannot generate plot.")
        return

    # Check for required columns
    required_efficiency_col = f'efficiency_at_{threads}_threads'
    required_speedup_col = f'speedup_at_{threads}_threads'
    required_cols = ['value_size', required_efficiency_col, required_speedup_col]

    if not all(col in df_metrics.columns for col in required_cols):
        print(f"Error: DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison
    # Adjust these ranges based on your expected min/max values across all datasets
    # For speedup, it typically starts at 1.0 (baseline) and goes up.
    # For efficiency, it's typically between 0.0 and 1.0 (or slightly above 1.0 for superlinear).
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup at {threads} Threads vs. Value Size',
                                        f'Efficiency at {threads} Threads vs. Value Size'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_speedup_col],
            mode='lines+markers',
            name=f'Speedup at {threads} Threads',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2)
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Baseline for speedup
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_efficiency_col],
            mode='lines+markers',
            name=f'Efficiency at {threads} Threads',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2)
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[0.5] * len(df_metrics), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling with Workload (at {threads} Threads)',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(), # Set specific tick values
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()], # Add 'B' for bytes
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(),
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_at_{threads}_threads.html")

In [5]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_single_df_performance(df: pd.DataFrame, workload_description: str = "Performance Metrics"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads for a single DataFrame, with consistent y-axis ranges.

    Args:
        df (pd.DataFrame): The input DataFrame containing 'num_threads', 'speedup',
                           and 'efficiency' columns.
        workload_description (str): A descriptive string for the plot title,
                                    e.g., "Value Size 50 Bytes, No Compression".
    """
    if df.empty or 'num_threads' not in df.columns or 'speedup' not in df.columns or 'efficiency' not in df.columns:
        print("Error: DataFrame must contain 'num_threads', 'speedup', and 'efficiency' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # IMPORTANT: Adjust these ranges based on the MIN and MAX values you expect
    # across ALL your benchmark datasets for both speedup and efficiency.
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup', f'Efficiency'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['speedup'],
            mode='lines+markers',
            name='Speedup',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2, color='royalblue')
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Baseline for speedup
            mode='lines',
            name='Ideal Speedup',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['efficiency'],
            mode='lines+markers',
            name='Efficiency',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2, color='firebrick')
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[0.5] * len(df), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_description} vs. Number of Threads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(), # Ensure ticks match thread counts
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(),
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    # The key change for consistent axes is setting the 'range' property
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_metrics_{workload_description.replace(' ', '_').replace('.', '')}.html")

In [6]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # Import plotly.express for its color sequences

def plot_combined_workload_performance(dfs: list[pd.DataFrame], workload_type_description: str = "Overall Performance"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. value_size for multiple thread counts on the same chart, with distinct colors.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_type_description (str): A general description for the plot title,
                                         e.g., "with Compression ON".
    """
    if not dfs:
        print("Error: Input list of DataFrames is empty. Cannot generate plot.")
        return

    # Combine all DataFrames into a single one for easier processing
    combined_df = pd.concat(dfs, ignore_index=True)

    # Ensure required columns exist
    required_cols = ['num_threads', 'value_size', 'speedup', 'efficiency']
    if not all(col in combined_df.columns for col in required_cols):
        print(f"Error: Combined DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Ensure speedup/efficiency are numeric
    combined_df['speedup'] = pd.to_numeric(combined_df['speedup'], errors='coerce')
    combined_df['efficiency'] = pd.to_numeric(combined_df['efficiency'], errors='coerce')
    combined_df.dropna(subset=['speedup', 'efficiency'], inplace=True)

    # Define fixed y-axis ranges for consistent comparison
    speedup_y_range = [0.0, max(5.0, combined_df['speedup'].max() * 1.1)]
    efficiency_y_range = [0.0, max(1.1, combined_df['efficiency'].max() * 1.1)]

    # Get unique thread counts for different lines
    unique_threads = sorted(combined_df['num_threads'].unique())
    # Get unique value sizes for x-axis ticks
    unique_value_sizes = sorted(combined_df['value_size'].unique())

    # --- Color and Marker Definition ---
    # Using Plotly Express's default qualitative color sequence for distinctness
    colors = px.colors.qualitative.Plotly
    # Define a list of distinct marker symbols
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    
    # Map thread count to a color and marker
    color_map = {thread: colors[i % len(colors)] for i, thread in enumerate(unique_threads)}
    marker_map = {thread: markers[i % len(markers)] for i, thread in enumerate(unique_threads)}
    # --- End Color and Marker Definition ---


    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Value Size',
                                        f'Efficiency vs. Value Size'))

    # Plot Speedup for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['speedup'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=True, # Show legend for each thread group
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=1
        )
    # Add ideal speedup line
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'), # Keep ideal lines distinct and subtle
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['efficiency'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=False, # Hide legend for efficiency to avoid duplication
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=2
        )
    # Add ideal efficiency lines
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[0.5] * len(unique_value_sizes),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_type_description}',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # fig.write_html(f"combined_performance_scaling_{workload_type_description.replace(' ', '_').replace('.', '')}.html")

In [7]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # For distinct colors and markers

def plot_performance_by_workload_on_threads(dfs: list[pd.DataFrame], workload_names: list[str]):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads, with each line representing a different workload.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_names (list[str]): A list of descriptive names for each workload,
                                     used for legend labels (e.g., ["50 Bytes", "500 Bytes"]).
                                     Length must match dfs.
    """
    if not dfs or not workload_names or len(dfs) != len(workload_names):
        print("Error: Input lists 'dfs' and 'workload_names' must not be empty and must have matching lengths.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # Adjust these ranges based on the MIN and MAX values you expect across ALL your datasets.
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Get all unique thread counts across all DFs for x-axis ticks
    all_num_threads = sorted(pd.concat(df['num_threads'] for df in dfs).unique())

    # --- Color and Marker Definition ---
    colors = px.colors.qualitative.Plotly # A good default palette
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    # --- End Color and Marker Definition ---

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Number of Threads',
                                        f'Efficiency vs. Number of Threads'))

    # Plot Speedup for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['speedup'],
                mode='lines+markers',
                name=f'Speedup: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=True
            ),
            row=1, col=1
        )

    # Add Ideal Speedup line (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='(1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['efficiency'],
                mode='lines+markers',
                name=f'Efficiency: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=False # Hide legend here to avoid duplication, as it's shown for Speedup
            ),
            row=1, col=2
        )

    # Add Ideal Efficiency lines (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[0.5] * len(all_num_threads),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling Across Workloads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes for clarity
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_across_workloads.html")

In [10]:
num_keys = 50.0
value_size = 50
compression_on = True
test_configs = [
    {"num_keys": 100.0, "value_size": 50},
    {"num_keys": 40.0, "value_size": 300},
    {"num_keys": 40.0, "value_size": 500},
    {"num_keys": 5.0, "value_size": 5000},
    {"num_keys": 1.0, "value_size": 20000},
    {"num_keys": 0.1, "value_size": 100000},
]


def load_df(test_config: dict, compression_on: bool):
    num_keys = test_config["num_keys"]
    value_size = test_config["value_size"]
    compression_str = "yes" if compression_on else "no"
    df = pd.read_csv(
        f"save_summary_{num_keys}keys_{value_size}B_comp-{compression_str}_csum-yes.csv"
    )
    return df


dfs_compression_on = [load_df(config, compression_on=True) for config in test_configs]
dfs_compression_on = [calculate_speedup_and_efficiency(df) for df in dfs_compression_on]
dfs_compression_off = [load_df(config, compression_on=False) for config in test_configs]
dfs_compression_off = [
    calculate_speedup_and_efficiency(df) for df in dfs_compression_off
]

print("\n" + "=" * 50)  # Top border
print("                 COMPRESSION ON")
print("=" * 50 + "\n")  # Bottom border

all_workload_names = [
    "Value Size 50B",
    "Value Size 300B",
    "Value Size 500B",
    "Value Size 5,000B",
    "Value Size 20,000B",
    "Value Size 100,000B",
]
plot_performance_by_workload_on_threads(dfs_compression_on, all_workload_names)
plot_combined_workload_performance(dfs_compression_on)

print("\n" + "=" * 50)  # Top border
print("                 COMPRESSION OFF")
print("=" * 50 + "\n")  # Bottom border


plot_performance_by_workload_on_threads(dfs_compression_off, all_workload_names)
plot_combined_workload_performance(dfs_compression_off)


                 COMPRESSION ON




                 COMPRESSION OFF



In [25]:
dfs_compression_on[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,25000000,50,1,yes,yes,94.219473,98.502183,1725000110,ok,7000,17.512303,99.872641,17.49,0,1725005824,2763960320,34,260,1.0,1.0
1,25000000,50,2,yes,yes,202.334783,211.531832,1725000110,ok,7000,8.154802,186.761136,15.23,0,1725001728,2763960320,6779,82,2.147484,1.073742
2,25000000,50,4,yes,yes,336.365778,351.655154,1725000110,ok,7000,4.905374,327.396024,16.06,0,1725001728,2764484608,1924,40,3.570024,0.892506
3,25000000,50,6,yes,yes,375.541637,392.611737,1725000110,ok,7000,4.393654,380.093657,16.7,0,1725001728,2765008896,4515,21,3.985818,0.664303
4,25000000,50,8,yes,yes,360.693588,377.088775,1725000110,ok,7000,4.57452,363.316814,16.62,0,1725001728,2767106048,3307,21,3.828228,0.478528
5,25000000,50,10,yes,yes,352.702544,368.734502,1725000110,ok,7000,4.678163,357.191486,16.71,0,1725001728,2767630336,2631,14,3.743415,0.374341


In [26]:
dfs_compression_off[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,25000000,50,1,no,yes,118.057425,123.42368,1725000110,ok,7000,13.976249,99.812187,13.95,0,1725005824,2762551296,30,212,1.0,1.0
1,25000000,50,2,no,yes,272.757708,285.155803,1725000110,ok,7000,6.049325,183.491549,11.1,0,1725001728,2762551296,3061,65,2.310382,1.155191
2,25000000,50,4,no,yes,383.138137,400.553532,1725000110,ok,7000,4.306541,275.395049,11.86,0,1725001728,2762551296,4213,13,3.245354,0.811338
3,25000000,50,6,no,yes,373.294173,390.262115,1725000110,ok,7000,4.420106,274.201538,12.12,0,1725001728,2762551296,4692,21,3.161971,0.526995
4,25000000,50,8,no,yes,360.742821,377.140246,1725000110,ok,7000,4.573896,266.075159,12.17,0,1725001728,2762551296,3431,16,3.055655,0.381957
5,25000000,50,10,no,yes,369.207798,385.989995,1725000110,ok,7000,4.469028,255.760311,11.43,0,1725001728,2762551296,2737,14,3.127358,0.312736


In [27]:
dfs_compression_on[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,25000000,500,1,yes,yes,132.242122,133.264661,12999746974,ok,7000,97.548344,99.919687,97.47,0,12999753728,14579798016,76,1472,1.0,1.0
1,25000000,500,2,yes,yes,360.498344,363.285834,12999746974,ok,7000,35.783798,181.478779,64.94,0,12999753728,14579798016,15671,276,2.726048,1.363024
2,25000000,500,4,yes,yes,399.065894,402.151601,12999746974,ok,7000,32.325489,217.661057,70.36,0,12999753728,14580322304,57746,118,3.017691,0.754423
3,25000000,500,6,yes,yes,398.686766,401.769541,12999746974,ok,7000,32.356228,230.496581,74.58,0,12999749632,14581370880,35458,112,3.014824,0.502471
4,25000000,500,8,yes,yes,399.148816,402.235164,12999746974,ok,7000,32.318773,235.683452,76.17,0,12999753728,14582943744,26648,51,3.018318,0.37729
5,25000000,500,10,yes,yes,398.86489,401.949042,12999746974,ok,7000,32.341779,241.390558,78.07,0,12999753728,14583992320,21268,56,3.016171,0.301617


In [28]:
dfs_compression_off[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,25000000,500,1,no,yes,200.103249,201.654439,13000000111,ok,7000,64.466719,99.912018,64.41,0,13000007680,14577467392,85,908,1.0,1.0
1,25000000,500,2,no,yes,399.848469,402.948073,13000000111,ok,7000,32.262222,104.859486,33.83,0,13000003584,14577467392,117553,153,1.998211,0.999105
2,25000000,500,4,no,yes,399.314447,402.409912,13000000111,ok,7000,32.305368,109.919814,35.51,0,13000007680,14577467392,57261,60,1.995542,0.498886
3,25000000,500,6,no,yes,399.304046,402.39943,13000000111,ok,7000,32.306209,112.826608,36.45,0,13000007680,14577467392,36868,45,1.99549,0.332582
4,25000000,500,8,no,yes,391.916725,394.954843,13000000111,ok,7000,32.915156,119.975128,39.49,0,13000007680,14577467392,25691,58,1.958573,0.244822
5,25000000,500,10,no,yes,397.47385,400.555046,13000000111,ok,7000,32.454965,121.214118,39.34,0,13000003584,14577991680,20046,45,1.986344,0.198634


In [1]:
13000000111/64.466719

201654439.88238335

In [29]:
dfs_compression_on[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,yes,yes,125.589138,112.973646,22560701466,ok,7000,199.698799,99.940511,199.58,0,22560706560,26104676352,181,2584,1.0,1.0
1,5000000,5000,2,yes,yes,294.755744,265.147382,22560701466,ok,7000,85.0874,187.724621,159.73,0,22560706560,26105200640,21349,617,2.346984,1.173492
2,5000000,5000,4,yes,yes,447.265043,402.337046,22560701466,ok,7000,56.074134,291.810123,163.63,0,22560706560,26105724928,42118,190,3.561335,0.890334
3,5000000,5000,6,yes,yes,446.751742,401.875306,22560701466,ok,7000,56.138561,306.812281,172.24,0,22560706560,26106249216,64335,119,3.557248,0.592875
4,5000000,5000,8,yes,yes,445.011956,400.310282,22560701466,ok,7000,56.358036,317.541226,178.96,0,22560706560,26106249216,47425,100,3.543395,0.442924
5,5000000,5000,10,yes,yes,443.025832,398.523666,22560701466,ok,7000,56.610694,321.352709,181.92,0,22560706560,26107822080,35055,108,3.527581,0.352758


In [30]:
dfs_compression_off[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,no,yes,296.57728,296.813786,25100000111,ok,7000,84.564806,99.935191,84.51,0,25100005376,26105221120,134,1232,1.0,1.0
1,5000000,5000,2,no,yes,397.110684,397.427361,25100000111,ok,7000,63.156195,84.74228,53.52,0,25100005376,26105221120,215311,252,1.338979,0.669489
2,5000000,5000,4,no,yes,401.345099,401.665153,25100000111,ok,7000,62.489862,111.346061,69.58,0,25100005376,26105221120,113780,169,1.353256,0.338314
3,5000000,5000,6,no,yes,402.032707,402.353309,25100000111,ok,7000,62.382984,113.636757,70.89,0,25100005376,26105221120,74271,94,1.355575,0.225929
4,5000000,5000,8,no,yes,401.772393,402.092787,25100000111,ok,7000,62.423403,146.067013,91.18,0,25100005376,26105221120,62910,94,1.354697,0.169337
5,5000000,5000,10,no,yes,402.127404,402.448082,25100000111,ok,7000,62.368294,151.006216,94.18,0,25100005376,26105745408,43111,93,1.355894,0.135589
