In [1]:
import pandas as pd

In [2]:
def calculate_speedup_and_efficiency(df: pd.DataFrame, num_threads_column: str = 'num_threads', duration_column: str = 'save_duration_seconds') -> pd.DataFrame:
    """
    Calculates speedup and efficiency for a DataFrame of benchmark results.

    Assumes the DataFrame contains results for different numbers of threads,
    including a run with 1 thread, which serves as the baseline.

    Args:
        df (pd.DataFrame): The input DataFrame containing benchmark results.
                          Must have columns for thread count and duration.
        num_threads_column (str): The name of the column containing the number of threads.
        duration_column (str): The name of the column containing the duration (runtime) in seconds.

    Returns:
        pd.DataFrame: A new DataFrame with 'speedup' and 'efficiency' columns added,
                      or an empty DataFrame if a single-thread baseline is not found.
    """
    if df.empty:
        print("Warning: Input DataFrame is empty.")
        return pd.DataFrame()

    # Get the baseline runtime (1 thread)
    baseline_row = df[df[num_threads_column] == 1]
    if baseline_row.empty:
        print(f"Error: Baseline (1 {num_threads_column}) not found in the DataFrame. Cannot calculate speedup.")
        return pd.DataFrame()

    baseline_runtime = baseline_row[duration_column].iloc[0]

    if baseline_runtime == 0:
        print("Warning: Baseline runtime is zero. Speedup and efficiency calculations will result in division by zero.")
        return pd.DataFrame()

    # Calculate Speedup
    # Speedup = Runtime (1 thread) / Runtime (P threads)
    df['speedup'] = baseline_runtime / df[duration_column]

    # Calculate Efficiency
    # Efficiency = Speedup / Number of threads
    df['efficiency'] = df['speedup'] / df[num_threads_column]

    return df

In [3]:
def extract_efficiency_and_speedup_at_threads(dfs: list[pd.DataFrame], threads = 2):
    extracted_data = []
    for i, df in enumerate(dfs):
        df_at_threads = df[df['num_threads'] == threads]
        efficiency_val = df_at_threads['efficiency'].iloc[0]
        value_size_val = df_at_threads['value_size'].iloc[0]
        speedup_val = df_at_threads['speedup'].iloc[0]

        extracted_data.append({'value_size': value_size_val, f'efficiency_at_{threads}_threads': efficiency_val, f'speedup_at_{threads}_threads': speedup_val })

    result_df = pd.DataFrame(extracted_data)
    result_df = result_df.sort_values(by='value_size').reset_index(drop=True)
    
    return result_df

In [4]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_workload_performance(df_metrics: pd.DataFrame, threads: int = 2):
    """
    Generates an interactive Plotly graph showing efficiency and speedup
    vs. value_size for a specific number of threads.

    Args:
        df_metrics (pd.DataFrame): DataFrame containing 'value_size',
                                   'efficiency_at_{threads}_threads',
                                   and 'speedup_at_{threads}_threads' columns.
                                   This DataFrame is typically generated by
                                   extract_efficiency_and_speedup_at_threads.
        threads (int): The number of threads for which the data is being plotted.
    """
    if df_metrics.empty:
        print("Error: Input DataFrame is empty. Cannot generate plot.")
        return

    # Check for required columns
    required_efficiency_col = f'efficiency_at_{threads}_threads'
    required_speedup_col = f'speedup_at_{threads}_threads'
    required_cols = ['value_size', required_efficiency_col, required_speedup_col]

    if not all(col in df_metrics.columns for col in required_cols):
        print(f"Error: DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison
    # Adjust these ranges based on your expected min/max values across all datasets
    # For speedup, it typically starts at 1.0 (baseline) and goes up.
    # For efficiency, it's typically between 0.0 and 1.0 (or slightly above 1.0 for superlinear).
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup at {threads} Threads vs. Value Size',
                                        f'Efficiency at {threads} Threads vs. Value Size'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_speedup_col],
            mode='lines+markers',
            name=f'Speedup at {threads} Threads',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2)
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Baseline for speedup
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_efficiency_col],
            mode='lines+markers',
            name=f'Efficiency at {threads} Threads',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2)
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[0.5] * len(df_metrics), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling with Workload (at {threads} Threads)',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(), # Set specific tick values
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()], # Add 'B' for bytes
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(),
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_at_{threads}_threads.html")

In [5]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_single_df_performance(df: pd.DataFrame, workload_description: str = "Performance Metrics"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads for a single DataFrame, with consistent y-axis ranges.

    Args:
        df (pd.DataFrame): The input DataFrame containing 'num_threads', 'speedup',
                           and 'efficiency' columns.
        workload_description (str): A descriptive string for the plot title,
                                    e.g., "Value Size 50 Bytes, No Compression".
    """
    if df.empty or 'num_threads' not in df.columns or 'speedup' not in df.columns or 'efficiency' not in df.columns:
        print("Error: DataFrame must contain 'num_threads', 'speedup', and 'efficiency' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # IMPORTANT: Adjust these ranges based on the MIN and MAX values you expect
    # across ALL your benchmark datasets for both speedup and efficiency.
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup', f'Efficiency'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['speedup'],
            mode='lines+markers',
            name='Speedup',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2, color='royalblue')
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Baseline for speedup
            mode='lines',
            name='Ideal Speedup',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['efficiency'],
            mode='lines+markers',
            name='Efficiency',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2, color='firebrick')
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[0.5] * len(df), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_description} vs. Number of Threads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(), # Ensure ticks match thread counts
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(),
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    # The key change for consistent axes is setting the 'range' property
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_metrics_{workload_description.replace(' ', '_').replace('.', '')}.html")

In [6]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # Import plotly.express for its color sequences

def plot_combined_workload_performance(dfs: list[pd.DataFrame], workload_type_description: str = "Overall Performance"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. value_size for multiple thread counts on the same chart, with distinct colors.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_type_description (str): A general description for the plot title,
                                         e.g., "with Compression ON".
    """
    if not dfs:
        print("Error: Input list of DataFrames is empty. Cannot generate plot.")
        return

    # Combine all DataFrames into a single one for easier processing
    combined_df = pd.concat(dfs, ignore_index=True)

    # Ensure required columns exist
    required_cols = ['num_threads', 'value_size', 'speedup', 'efficiency']
    if not all(col in combined_df.columns for col in required_cols):
        print(f"Error: Combined DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Ensure speedup/efficiency are numeric
    combined_df['speedup'] = pd.to_numeric(combined_df['speedup'], errors='coerce')
    combined_df['efficiency'] = pd.to_numeric(combined_df['efficiency'], errors='coerce')
    combined_df.dropna(subset=['speedup', 'efficiency'], inplace=True)

    # Define fixed y-axis ranges for consistent comparison
    speedup_y_range = [0.0, max(5.0, combined_df['speedup'].max() * 1.1)]
    efficiency_y_range = [0.0, max(1.1, combined_df['efficiency'].max() * 1.1)]

    # Get unique thread counts for different lines
    unique_threads = sorted(combined_df['num_threads'].unique())
    # Get unique value sizes for x-axis ticks
    unique_value_sizes = sorted(combined_df['value_size'].unique())

    # --- Color and Marker Definition ---
    # Using Plotly Express's default qualitative color sequence for distinctness
    colors = px.colors.qualitative.Plotly
    # Define a list of distinct marker symbols
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    
    # Map thread count to a color and marker
    color_map = {thread: colors[i % len(colors)] for i, thread in enumerate(unique_threads)}
    marker_map = {thread: markers[i % len(markers)] for i, thread in enumerate(unique_threads)}
    # --- End Color and Marker Definition ---


    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Value Size',
                                        f'Efficiency vs. Value Size'))

    # Plot Speedup for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['speedup'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=True, # Show legend for each thread group
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=1
        )
    # Add ideal speedup line
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'), # Keep ideal lines distinct and subtle
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['efficiency'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=False, # Hide legend for efficiency to avoid duplication
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=2
        )
    # Add ideal efficiency lines
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[0.5] * len(unique_value_sizes),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_type_description}',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # fig.write_html(f"combined_performance_scaling_{workload_type_description.replace(' ', '_').replace('.', '')}.html")

In [7]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # For distinct colors and markers

def plot_performance_by_workload_on_threads(dfs: list[pd.DataFrame], workload_names: list[str]):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads, with each line representing a different workload.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_names (list[str]): A list of descriptive names for each workload,
                                     used for legend labels (e.g., ["50 Bytes", "500 Bytes"]).
                                     Length must match dfs.
    """
    if not dfs or not workload_names or len(dfs) != len(workload_names):
        print("Error: Input lists 'dfs' and 'workload_names' must not be empty and must have matching lengths.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # Adjust these ranges based on the MIN and MAX values you expect across ALL your datasets.
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Get all unique thread counts across all DFs for x-axis ticks
    all_num_threads = sorted(pd.concat(df['num_threads'] for df in dfs).unique())

    # --- Color and Marker Definition ---
    colors = px.colors.qualitative.Plotly # A good default palette
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    # --- End Color and Marker Definition ---

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Number of Threads',
                                        f'Efficiency vs. Number of Threads'))

    # Plot Speedup for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['speedup'],
                mode='lines+markers',
                name=f'Speedup: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=True
            ),
            row=1, col=1
        )

    # Add Ideal Speedup line (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='(1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['efficiency'],
                mode='lines+markers',
                name=f'Efficiency: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=False # Hide legend here to avoid duplication, as it's shown for Speedup
            ),
            row=1, col=2
        )

    # Add Ideal Efficiency lines (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[0.5] * len(all_num_threads),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling Across Workloads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes for clarity
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_across_workloads.html")

In [19]:
num_keys = 50.0
value_size = 50
compression_on = True
test_configs = [
    {"num_keys": 100.0, "value_size": 50},
    {"num_keys": 40.0, "value_size": 300},
    {"num_keys": 40.0, "value_size": 500},
    {"num_keys": 5.0, "value_size": 5000},
    {"num_keys": 1.0, "value_size": 20000},
    {"num_keys": 0.1, "value_size": 100000},

    ]

def load_df(test_config: dict, compression_on: bool):
    num_keys = test_config["num_keys"]
    value_size = test_config["value_size"]
    compression_str = "yes" if compression_on else "no"
    df = pd.read_csv(f"save_summary_{num_keys}keys_{value_size}B_comp-{compression_str}_csum-yes.csv")
    return df


dfs_compression_on = [load_df(config, compression_on=True) for config in test_configs]
dfs_compression_on = [calculate_speedup_and_efficiency(df) for df in dfs_compression_on]
dfs_compression_off = [load_df(config, compression_on=False) for config in test_configs]
dfs_compression_off = [calculate_speedup_and_efficiency(df) for df in dfs_compression_off]

print("\n" + "="*50) # Top border
print("                 COMPRESSION ON")
print("="*50 + "\n") # Bottom border

all_workload_names = ["Value Size 50B","Value Size 300B", "Value Size 500B", "Value Size 5,000B", "Value Size 20,000B", "Value Size 100,000B"]
plot_performance_by_workload_on_threads(dfs_compression_on, all_workload_names)
plot_combined_workload_performance(dfs_compression_on)

print("\n" + "="*50) # Top border
print("                 COMPRESSION OFF")
print("="*50 + "\n") # Bottom border

plot_performance_by_workload_on_threads(dfs_compression_off, all_workload_names)
plot_combined_workload_performance(dfs_compression_off)
        


                 COMPRESSION ON




                 COMPRESSION OFF



In [9]:
dfs_compression_on[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,100000000,50,1,yes,yes,116.042572,121.317236,6900000111,ok,7000,56.875678,99.954852,56.85,0,0,11030614016,1,829,1.0,1.0
1,100000000,50,2,yes,yes,212.926992,222.605495,6900000111,ok,7000,30.99654,189.021098,58.59,0,0,11031138304,119458,306,1.834904,0.917452
2,100000000,50,3,yes,yes,222.556591,232.672803,6900000111,ok,7000,29.655379,247.611067,73.43,0,0,11032186880,442868,110,1.917887,0.639296
3,100000000,50,4,yes,yes,223.142584,233.285432,6900000111,ok,7000,29.577501,263.544915,77.95,0,0,11032186880,363605,64,1.922937,0.480734
4,100000000,50,6,yes,yes,208.822833,218.314783,6900000111,ok,7000,31.605739,260.364105,82.29,0,0,11032711168,244343,38,1.799536,0.299923
5,100000000,50,8,yes,yes,116.892689,122.205995,6900000111,ok,7000,56.462043,161.47131,91.17,0,0,11033759744,181657,52,1.007326,0.125916
6,100000000,50,10,yes,yes,112.45129,117.562714,6900000111,ok,7000,58.692079,158.249633,92.88,0,0,11036381184,151677,30,0.969052,0.096905


In [10]:
dfs_compression_off[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,100000000,50,1,no,yes,146.832364,153.506565,6900000111,ok,7000,44.949218,99.934997,44.92,0,0,11029032960,1,663,1.0,1.0
1,100000000,50,2,no,yes,154.96838,162.012399,6900000111,ok,7000,42.589333,146.77384,62.51,0,0,11029032960,812053,173,1.05541,0.527705
2,100000000,50,3,no,yes,162.631378,170.023716,6900000111,ok,7000,40.582574,154.475168,62.69,0,0,11029557248,551041,90,1.107599,0.3692
3,100000000,50,4,no,yes,164.706155,172.192801,6900000111,ok,7000,40.071362,160.763189,64.42,0,0,11029557248,396448,63,1.121729,0.280432
4,100000000,50,6,no,yes,155.000592,162.046076,6900000111,ok,7000,42.580483,163.4082,69.58,0,0,11029557248,260872,58,1.05563,0.175938
5,100000000,50,8,no,yes,148.89496,155.662915,6900000111,ok,7000,44.326551,183.637117,81.4,0,0,11029557248,209331,37,1.014047,0.126756
6,100000000,50,10,no,yes,148.142423,154.876172,6900000111,ok,7000,44.551722,183.741496,81.86,0,0,11029557248,150491,33,1.008922,0.100892


In [11]:
dfs_compression_on[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,40000000,300,1,yes,yes,152.36071,154.289298,12799997602,ok,7000,82.961021,99.950554,82.92,0,0,15175131136,1,1175,1.0,1.0
1,40000000,300,2,yes,yes,161.532231,163.576913,12799997602,ok,7000,78.250637,152.216013,119.11,0,0,15176179712,1466730,384,1.060196,0.530098
2,40000000,300,3,yes,yes,169.633673,171.780903,12799997602,ok,7000,74.513508,159.890473,119.14,0,0,15176179712,995994,192,1.113369,0.371123
3,40000000,300,4,yes,yes,165.691304,167.788631,12799997602,ok,7000,76.286442,163.240016,124.53,0,0,15177228288,726876,159,1.087494,0.271873
4,40000000,300,6,yes,yes,156.031543,158.006596,12799997602,ok,7000,81.009261,164.549087,133.3,0,0,15178276864,469007,88,1.024093,0.170682
5,40000000,300,8,yes,yes,126.626489,128.229332,12799997602,ok,7000,99.821136,155.428004,155.15,0,0,15178801152,348369,114,0.831097,0.103887
6,40000000,300,10,yes,yes,123.077672,124.635594,12799997602,ok,7000,102.699375,171.597928,176.23,0,0,15180374016,275042,88,0.807805,0.08078


In [12]:
dfs_compression_off[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,40000000,300,1,no,yes,231.267927,234.195371,12800000111,ok,7000,54.655223,99.935555,54.62,0,0,15173292032,1,764,1.0,1.0
1,40000000,300,2,no,yes,269.346915,272.756372,12800000111,ok,7000,46.928327,147.458912,69.2,0,0,15173292032,1127925,247,1.164653,0.582327
2,40000000,300,3,no,yes,247.267141,250.397107,12800000111,ok,7000,51.118802,153.231291,78.33,0,0,15173292032,912218,183,1.06918,0.356393
3,40000000,300,4,no,yes,250.383048,253.552456,12800000111,ok,7000,50.482651,166.096666,83.85,0,0,15173292032,666618,153,1.082654,0.270663
4,40000000,300,6,no,yes,233.666874,236.624685,12800000111,ok,7000,54.094103,158.982209,86.0,0,0,15173292032,385966,96,1.010373,0.168396
5,40000000,300,8,no,yes,237.414241,240.419486,12800000111,ok,7000,53.240277,159.428171,84.88,0,0,15173292032,231818,56,1.026577,0.128322
6,40000000,300,10,no,yes,233.273001,236.225826,12800000111,ok,7000,54.185439,160.375188,86.9,0,0,15173816320,164729,62,1.00867,0.100867


In [13]:
dfs_compression_on[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,40000000,500,1,yes,yes,172.94567,174.282936,20799594423,ok,7000,119.343838,99.954889,119.29,0,0,23428284416,1,1838,1.0,1.0
1,40000000,500,2,yes,yes,180.760936,182.158632,20799594423,ok,7000,114.183963,154.688974,176.63,0,0,23428808704,2317708,492,1.045189,0.522595
2,40000000,500,3,yes,yes,167.621864,168.917965,20799594423,ok,7000,123.134295,157.072407,193.41,0,0,23429332992,1629921,323,0.969217,0.323072
3,40000000,500,4,yes,yes,169.608223,170.919683,20799594423,ok,7000,121.692213,160.979898,195.9,0,0,23430381568,1204519,188,0.980702,0.245176
4,40000000,500,6,yes,yes,157.531329,158.749406,20799594423,ok,7000,131.021557,162.5763,213.01,0,0,23431954432,773470,164,0.910872,0.151812
5,40000000,500,8,yes,yes,131.091316,132.104951,20799594423,ok,7000,157.447501,148.220835,233.37,0,0,23433527296,590591,143,0.757991,0.094749
6,40000000,500,10,yes,yes,128.728238,129.723602,20799594423,ok,7000,160.33778,166.34258,266.71,0,0,23435100160,467480,134,0.744328,0.074433


In [14]:
dfs_compression_off[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,40000000,500,1,no,yes,331.91653,334.489528,20800000111,ok,7000,62.184309,99.960909,62.16,0,0,23428902912,1,994,1.0,1.0
1,40000000,500,2,no,yes,275.323665,277.457958,20800000111,ok,7000,74.966313,145.465337,109.05,0,0,23428902912,2277322,315,0.829497,0.414748
2,40000000,500,3,no,yes,259.419334,261.430338,20800000111,ok,7000,79.562304,160.012962,127.31,0,0,23429427200,1676561,144,0.78158,0.260527
3,40000000,500,4,no,yes,256.422866,258.410641,20800000111,ok,7000,80.492042,167.209077,134.59,0,0,23429427200,1206540,172,0.772552,0.193138
4,40000000,500,6,no,yes,238.580866,240.430331,20800000111,ok,7000,86.511548,165.330529,143.03,0,0,23429951488,686863,124,0.718798,0.1198
5,40000000,500,8,no,yes,254.33795,256.309563,20800000111,ok,7000,81.151869,156.361155,126.89,0,0,23429951488,360279,94,0.766271,0.095784
6,40000000,500,10,no,yes,253.600688,255.566586,20800000111,ok,7000,81.387792,162.469084,132.23,0,0,23429951488,269805,81,0.76405,0.076405


In [15]:
dfs_compression_on[3]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,yes,yes,147.3934,132.587385,22560654776,ok,7000,170.156873,99.954823,170.08,0,0,26105483264,1,2687,1.0,1.0
1,5000000,5000,2,yes,yes,268.408759,241.446466,22560654776,ok,7000,93.439573,193.258588,180.58,0,0,26105483264,256046,708,1.821036,0.910518
2,5000000,5000,3,yes,yes,265.219722,238.577775,22560654776,ok,7000,94.563103,244.291898,231.01,0,0,26105483264,1261061,419,1.7994,0.5998
3,5000000,5000,4,yes,yes,260.220358,234.080608,22560654776,ok,7000,96.379854,251.629351,242.52,0,0,26105483264,1106530,273,1.765482,0.44137
4,5000000,5000,6,yes,yes,226.683448,203.91256,22560654776,ok,7000,110.638868,234.953597,259.95,0,0,26107056128,721072,217,1.537948,0.256325
5,5000000,5000,8,yes,yes,152.763954,137.418454,22560654776,ok,7000,164.174855,173.723315,285.21,0,0,26107580416,535202,183,1.036437,0.129555
6,5000000,5000,10,yes,yes,142.14832,127.869185,22560654776,ok,7000,176.43543,165.159571,291.4,0,0,26107580416,432433,148,0.964414,0.096441


In [16]:
dfs_compression_off[3]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,no,yes,444.276926,444.631216,25100000111,ok,7000,56.451277,99.962308,56.43,0,0,26106732544,1,825,1.0,1.0
1,5000000,5000,2,no,yes,347.700104,347.977378,25100000111,ok,7000,72.131126,140.161406,101.1,0,0,26106732544,1972960,206,0.78262,0.39131
2,5000000,5000,3,no,yes,337.089078,337.35789,25100000111,ok,7000,74.401699,158.665732,118.05,0,0,26106732544,1504659,124,0.758736,0.252912
3,5000000,5000,4,no,yes,327.39788,327.658964,25100000111,ok,7000,76.604039,157.524331,120.67,0,0,26106732544,1076106,106,0.736923,0.184231
4,5000000,5000,6,no,yes,314.847746,315.098822,25100000111,ok,7000,79.65755,165.772109,132.05,0,0,26106732544,535307,143,0.708675,0.118112
5,5000000,5000,8,no,yes,323.889089,324.147375,25100000111,ok,7000,77.433914,150.541272,116.57,0,0,26106732544,400156,81,0.729025,0.091128
6,5000000,5000,10,no,yes,329.989144,330.252295,25100000111,ok,7000,76.0025,143.968948,109.42,0,0,26106732544,263889,74,0.742756,0.074276


In [17]:
dfs_compression_on[4]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,1000000,20000,1,yes,yes,205.622633,163.783097,15943198583,ok,7000,97.34337,99.955447,97.3,0,0,20710211584,1,1412,1.0,1.0
1,1000000,20000,2,yes,yes,405.626204,323.090484,15943198583,ok,7000,49.345924,198.901938,98.15,0,0,20710211584,7322,504,1.972673,0.986336
2,1000000,20000,3,yes,yes,555.42133,442.405704,15943198583,ok,7000,36.037507,287.256276,103.52,0,0,20711260160,89939,173,2.701168,0.900389
3,1000000,20000,4,yes,yes,519.12437,413.49435,15943198583,ok,7000,38.557234,312.652092,120.55,0,0,20712833024,230780,133,2.524646,0.631162
4,1000000,20000,6,yes,yes,473.992685,377.545939,15943198583,ok,7000,42.2285,296.766404,125.32,0,0,20714930176,165016,82,2.305158,0.384193
5,1000000,20000,8,yes,yes,387.789696,308.8833,15943198583,ok,7000,51.615606,258.60396,133.48,0,0,20716503040,124236,77,1.885929,0.235741
6,1000000,20000,10,yes,yes,386.334702,307.724364,15943198583,ok,7000,51.809998,260.992098,135.22,0,0,20718600192,98471,69,1.878853,0.187885


In [18]:
dfs_compression_off[4]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,1000000,20000,1,no,yes,781.544453,781.817779,20023000111,ok,7000,25.610827,99.957724,25.6,0,0,20713738240,1,363,1.0,1.0
1,1000000,20000,2,no,yes,658.675878,658.906234,20023000111,ok,7000,30.388239,126.759566,38.52,0,0,20713738240,478700,141,0.842787,0.421394
2,1000000,20000,3,no,yes,659.491038,659.721679,20023000111,ok,7000,30.350678,129.189866,39.21,0,0,20714262528,331258,130,0.84383,0.281277
3,1000000,20000,4,no,yes,637.09295,637.315758,20023000111,ok,7000,31.417708,131.422701,41.29,0,0,20714262528,243343,86,0.815172,0.203793
4,1000000,20000,6,no,yes,643.974144,644.199358,20023000111,ok,7000,31.081993,142.783636,44.38,0,0,20714262528,166631,46,0.823976,0.137329
5,1000000,20000,8,no,yes,617.148297,617.36413,20023000111,ok,7000,32.433047,169.025128,54.82,0,0,20714262528,124939,55,0.789652,0.098707
6,1000000,20000,10,no,yes,615.35282,615.568025,20023000111,ok,7000,32.527681,174.436046,56.74,0,0,20714262528,84691,40,0.787355,0.078735
