In [1]:
import pandas as pd

In [2]:
def calculate_speedup_and_efficiency(df: pd.DataFrame, num_threads_column: str = 'num_threads', duration_column: str = 'save_duration_seconds') -> pd.DataFrame:
    """
    Calculates speedup and efficiency for a DataFrame of benchmark results.

    Assumes the DataFrame contains results for different numbers of threads,
    including a run with 1 thread, which serves as the baseline.

    Args:
        df (pd.DataFrame): The input DataFrame containing benchmark results.
                          Must have columns for thread count and duration.
        num_threads_column (str): The name of the column containing the number of threads.
        duration_column (str): The name of the column containing the duration (runtime) in seconds.

    Returns:
        pd.DataFrame: A new DataFrame with 'speedup' and 'efficiency' columns added,
                      or an empty DataFrame if a single-thread baseline is not found.
    """
    if df.empty:
        print("Warning: Input DataFrame is empty.")
        return pd.DataFrame()

    # Get the baseline runtime (1 thread)
    baseline_row = df[df[num_threads_column] == 1]
    if baseline_row.empty:
        print(f"Error: Baseline (1 {num_threads_column}) not found in the DataFrame. Cannot calculate speedup.")
        return pd.DataFrame()

    baseline_runtime = baseline_row[duration_column].iloc[0]

    if baseline_runtime == 0:
        print("Warning: Baseline runtime is zero. Speedup and efficiency calculations will result in division by zero.")
        return pd.DataFrame()

    # Calculate Speedup
    # Speedup = Runtime (1 thread) / Runtime (P threads)
    df['speedup'] = baseline_runtime / df[duration_column]

    # Calculate Efficiency
    # Efficiency = Speedup / Number of threads
    df['efficiency'] = df['speedup'] / df[num_threads_column]

    return df

In [3]:
def extract_efficiency_and_speedup_at_threads(dfs: list[pd.DataFrame], threads = 2):
    extracted_data = []
    for i, df in enumerate(dfs):
        df_at_threads = df[df['num_threads'] == threads]
        efficiency_val = df_at_threads['efficiency'].iloc[0]
        value_size_val = df_at_threads['value_size'].iloc[0]
        speedup_val = df_at_threads['speedup'].iloc[0]

        extracted_data.append({'value_size': value_size_val, f'efficiency_at_{threads}_threads': efficiency_val, f'speedup_at_{threads}_threads': speedup_val })

    result_df = pd.DataFrame(extracted_data)
    result_df = result_df.sort_values(by='value_size').reset_index(drop=True)
    
    return result_df

In [4]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_workload_performance(df_metrics: pd.DataFrame, threads: int = 2):
    """
    Generates an interactive Plotly graph showing efficiency and speedup
    vs. value_size for a specific number of threads.

    Args:
        df_metrics (pd.DataFrame): DataFrame containing 'value_size',
                                   'efficiency_at_{threads}_threads',
                                   and 'speedup_at_{threads}_threads' columns.
                                   This DataFrame is typically generated by
                                   extract_efficiency_and_speedup_at_threads.
        threads (int): The number of threads for which the data is being plotted.
    """
    if df_metrics.empty:
        print("Error: Input DataFrame is empty. Cannot generate plot.")
        return

    # Check for required columns
    required_efficiency_col = f'efficiency_at_{threads}_threads'
    required_speedup_col = f'speedup_at_{threads}_threads'
    required_cols = ['value_size', required_efficiency_col, required_speedup_col]

    if not all(col in df_metrics.columns for col in required_cols):
        print(f"Error: DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison
    # Adjust these ranges based on your expected min/max values across all datasets
    # For speedup, it typically starts at 1.0 (baseline) and goes up.
    # For efficiency, it's typically between 0.0 and 1.0 (or slightly above 1.0 for superlinear).
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup at {threads} Threads vs. Value Size',
                                        f'Efficiency at {threads} Threads vs. Value Size'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_speedup_col],
            mode='lines+markers',
            name=f'Speedup at {threads} Threads',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2)
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Baseline for speedup
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=df_metrics[required_efficiency_col],
            mode='lines+markers',
            name=f'Efficiency at {threads} Threads',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2)
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[1] * len(df_metrics), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df_metrics['value_size'],
            y=[0.5] * len(df_metrics), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling with Workload (at {threads} Threads)',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(), # Set specific tick values
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()], # Add 'B' for bytes
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log', # Use log scale if value_size spans orders of magnitude
        tickmode='array',
        tickvals=df_metrics['value_size'].tolist(),
        ticktext=[f'{s}B' for s in df_metrics['value_size'].tolist()],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_at_{threads}_threads.html")

In [5]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_single_df_performance(df: pd.DataFrame, workload_description: str = "Performance Metrics"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads for a single DataFrame, with consistent y-axis ranges.

    Args:
        df (pd.DataFrame): The input DataFrame containing 'num_threads', 'speedup',
                           and 'efficiency' columns.
        workload_description (str): A descriptive string for the plot title,
                                    e.g., "Value Size 50 Bytes, No Compression".
    """
    if df.empty or 'num_threads' not in df.columns or 'speedup' not in df.columns or 'efficiency' not in df.columns:
        print("Error: DataFrame must contain 'num_threads', 'speedup', and 'efficiency' columns for plotting.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # IMPORTANT: Adjust these ranges based on the MIN and MAX values you expect
    # across ALL your benchmark datasets for both speedup and efficiency.
    speedup_y_range = [0.0, 5.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 1.5] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup', f'Efficiency'))

    # Plot Speedup
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['speedup'],
            mode='lines+markers',
            name='Speedup',
            marker=dict(symbol='circle', size=8),
            line=dict(width=2, color='royalblue')
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Baseline for speedup
            mode='lines',
            name='Ideal Speedup',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=1
    )

    # Plot Efficiency
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=df['efficiency'],
            mode='lines+markers',
            name='Efficiency',
            marker=dict(symbol='diamond', size=8),
            line=dict(width=2, color='firebrick')
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[1] * len(df), # Ideal efficiency
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=df['num_threads'],
            y=[0.5] * len(df), # 50% efficiency reference
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True
        ),
        row=1, col=2
    )

    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_description} vs. Number of Threads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified", # Shows hover info for all traces at x-position
        template="plotly_white", # Clean white background
        title_x=0.5, # Center the main title
    )

    # Update x-axes
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(), # Ensure ticks match thread counts
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=df['num_threads'].unique(),
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    # The key change for consistent axes is setting the 'range' property
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_metrics_{workload_description.replace(' ', '_').replace('.', '')}.html")

In [6]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # Import plotly.express for its color sequences

def plot_combined_workload_performance(dfs: list[pd.DataFrame], workload_type_description: str = "Overall Performance"):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. value_size for multiple thread counts on the same chart, with distinct colors.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_type_description (str): A general description for the plot title,
                                         e.g., "with Compression ON".
    """
    if not dfs:
        print("Error: Input list of DataFrames is empty. Cannot generate plot.")
        return

    # Combine all DataFrames into a single one for easier processing
    combined_df = pd.concat(dfs, ignore_index=True)

    # Ensure required columns exist
    required_cols = ['num_threads', 'value_size', 'speedup', 'efficiency']
    if not all(col in combined_df.columns for col in required_cols):
        print(f"Error: Combined DataFrame must contain '{required_cols}' columns for plotting.")
        return

    # Ensure speedup/efficiency are numeric
    combined_df['speedup'] = pd.to_numeric(combined_df['speedup'], errors='coerce')
    combined_df['efficiency'] = pd.to_numeric(combined_df['efficiency'], errors='coerce')
    combined_df.dropna(subset=['speedup', 'efficiency'], inplace=True)

    # Define fixed y-axis ranges for consistent comparison
    speedup_y_range = [0.0, max(5.0, combined_df['speedup'].max() * 1.1)]
    efficiency_y_range = [0.0, max(1.1, combined_df['efficiency'].max() * 1.1)]

    # Get unique thread counts for different lines
    unique_threads = sorted(combined_df['num_threads'].unique())
    # Get unique value sizes for x-axis ticks
    unique_value_sizes = sorted(combined_df['value_size'].unique())

    # --- Color and Marker Definition ---
    # Using Plotly Express's default qualitative color sequence for distinctness
    colors = px.colors.qualitative.Plotly
    # Define a list of distinct marker symbols
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    
    # Map thread count to a color and marker
    color_map = {thread: colors[i % len(colors)] for i, thread in enumerate(unique_threads)}
    marker_map = {thread: markers[i % len(markers)] for i, thread in enumerate(unique_threads)}
    # --- End Color and Marker Definition ---


    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Value Size',
                                        f'Efficiency vs. Value Size'))

    # Plot Speedup for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['speedup'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=True, # Show legend for each thread group
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=1
        )
    # Add ideal speedup line
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal (1x Speedup)',
            line=dict(dash='dash', color='gray'), # Keep ideal lines distinct and subtle
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each number of threads
    for threads_count in unique_threads:
        df_subset = combined_df[combined_df['num_threads'] == threads_count]
        fig.add_trace(
            go.Scatter(
                x=df_subset['value_size'],
                y=df_subset['efficiency'],
                mode='lines+markers',
                name=f'{threads_count} Threads',
                legendgroup=str(threads_count), # Group traces for consistent legend
                showlegend=False, # Hide legend for efficiency to avoid duplication
                marker=dict(symbol=marker_map[threads_count], size=8), # Apply distinct marker
                line=dict(width=2, color=color_map[threads_count]) # Apply distinct color
            ),
            row=1, col=2
        )
    # Add ideal efficiency lines
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[1] * len(unique_value_sizes),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=unique_value_sizes,
            y=[0.5] * len(unique_value_sizes),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'{workload_type_description}',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes to be consistent and descriptive
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Value Size (Bytes)',
        type='log',
        tickmode='array',
        tickvals=unique_value_sizes,
        ticktext=[f'{s}B' for s in unique_value_sizes],
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # fig.write_html(f"combined_performance_scaling_{workload_type_description.replace(' ', '_').replace('.', '')}.html")

In [7]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px # For distinct colors and markers

def plot_performance_by_workload_on_threads(dfs: list[pd.DataFrame], workload_names: list[str]):
    """
    Generates an interactive Plotly graph showing speedup and efficiency
    vs. number of threads, with each line representing a different workload.

    Args:
        dfs (list[pd.DataFrame]): A list of DataFrames, where each DataFrame
                                   contains 'num_threads', 'value_size', 'speedup',
                                   and 'efficiency' columns for a specific workload.
                                   It's assumed 'speedup' and 'efficiency' have
                                   already been calculated for each df.
        workload_names (list[str]): A list of descriptive names for each workload,
                                     used for legend labels (e.g., ["50 Bytes", "500 Bytes"]).
                                     Length must match dfs.
    """
    if not dfs or not workload_names or len(dfs) != len(workload_names):
        print("Error: Input lists 'dfs' and 'workload_names' must not be empty and must have matching lengths.")
        return

    # Define fixed y-axis ranges for consistent comparison across all your plots
    # Adjust these ranges based on the MIN and MAX values you expect across ALL your datasets.
    speedup_y_range = [0.0, 10.0]  # Example: from 0x to 5x speedup
    efficiency_y_range = [0.0, 2] # Example: from 0% to 150% efficiency (to accommodate superlinear)

    # Get all unique thread counts across all DFs for x-axis ticks
    all_num_threads = sorted(pd.concat(df['num_threads'] for df in dfs).unique())

    # --- Color and Marker Definition ---
    colors = px.colors.qualitative.Plotly # A good default palette
    markers = ['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up', 'triangle-down', 'pentagon', 'hexagram']
    # --- End Color and Marker Definition ---

    # Create subplots: one for Speedup, one for Efficiency
    fig = make_subplots(rows=1, cols=2,
                        subplot_titles=(f'Speedup vs. Number of Threads',
                                        f'Efficiency vs. Number of Threads'))

    # Plot Speedup for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['speedup'],
                mode='lines+markers',
                name=f'Speedup: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=True
            ),
            row=1, col=1
        )

    # Add Ideal Speedup line (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='(1x Speedup)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_speedup'
        ),
        row=1, col=1
    )


    # Plot Efficiency for each workload
    for i, df in enumerate(dfs):
        workload_label = workload_names[i]
        color = colors[i % len(colors)]
        marker = markers[i % len(markers)]

        fig.add_trace(
            go.Scatter(
                x=df['num_threads'],
                y=df['efficiency'],
                mode='lines+markers',
                name=f'Efficiency: {workload_label}',
                marker=dict(symbol=marker, size=8),
                line=dict(width=2, color=color),
                legendgroup=workload_label, # Group traces for consistent legend
                showlegend=False # Hide legend here to avoid duplication, as it's shown for Speedup
            ),
            row=1, col=2
        )

    # Add Ideal Efficiency lines (for reference, only once)
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[1] * len(all_num_threads),
            mode='lines',
            name='Ideal Efficiency (100%)',
            line=dict(dash='dash', color='gray'),
            showlegend=True,
            legendgroup='ideal_efficiency_1'
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=all_num_threads,
            y=[0.5] * len(all_num_threads),
            mode='lines',
            name='50% Efficiency',
            line=dict(dash='dot', color='orange'),
            showlegend=True,
            legendgroup='ideal_efficiency_05'
        ),
        row=1, col=2
    )


    # Update layout for a beautiful graph
    fig.update_layout(
        title_text=f'Performance Scaling Across Workloads',
        height=600, width=1200,
        showlegend=True,
        hovermode="x unified",
        template="plotly_white",
        title_x=0.5,
    )

    # Update x-axes for clarity
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=1
    )
    fig.update_xaxes(
        title_text='Number of Threads',
        tickmode='array',
        tickvals=all_num_threads,
        row=1, col=2
    )

    # Update y-axes for clarity and consistent ranges
    fig.update_yaxes(title_text='Speedup (X times faster)', range=speedup_y_range, row=1, col=1)
    fig.update_yaxes(title_text='Efficiency (Fraction of Ideal)', range=efficiency_y_range, row=1, col=2)

    fig.show()
    # You can also save it as an HTML file for interactive viewing later:
    # fig.write_html(f"performance_scaling_across_workloads.html")

In [8]:
num_keys = 50.0
value_size = 50
compression_on = True
test_configs = [
    {"num_keys": 40.0, "value_size": 50},
    {"num_keys": 20.0, "value_size": 300},
    {"num_keys": 10.0, "value_size": 500},
    {"num_keys": 5.0, "value_size": 5000},
    {"num_keys": 1.0, "value_size": 20000},
    {"num_keys": 0.1, "value_size": 100000},

    ]

def load_df(test_config: dict, compression_on: bool):
    num_keys = test_config["num_keys"]
    value_size = test_config["value_size"]
    compression_str = "yes" if compression_on else "no"
    df = pd.read_csv(f"save_summary_{num_keys}keys_{value_size}B_comp-{compression_str}_csum-yes.csv")
    return df


dfs_compression_on = [load_df(config, compression_on=True) for config in test_configs]
dfs_compression_on = [calculate_speedup_and_efficiency(df) for df in dfs_compression_on]
dfs_compression_off = [load_df(config, compression_on=False) for config in test_configs]
dfs_compression_off = [calculate_speedup_and_efficiency(df) for df in dfs_compression_off]

print("\n" + "="*50) # Top border
print("                 COMPRESSION ON")
print("="*50 + "\n") # Bottom border

all_workload_names = ["Value Size 50B", "Value Size 300B", "Value Size 500B", "Value Size 5,000B", "Value Size 20,000B", "Value Size 100,000B"]
plot_performance_by_workload_on_threads(dfs_compression_on, all_workload_names)
plot_combined_workload_performance(dfs_compression_on)

print("\n" + "="*50) # Top border
print("                 COMPRESSION OFF")
print("="*50 + "\n") # Bottom border

all_workload_names = ["Value Size 50B", "Value Size 300B", "Value Size 500B", "Value Size 5,000B", "Value Size 20,000B", "Value Size 100,000B"]
plot_performance_by_workload_on_threads(dfs_compression_off, all_workload_names)
plot_combined_workload_performance(dfs_compression_off)
        


                 COMPRESSION ON




                 COMPRESSION OFF



In [9]:
dfs_compression_on[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,40000000,50,1,yes,yes,124.846149,130.52098,2760000110,ok,7000,21.146027,99.92421,21.13,0,0,4525289472,1,312,1.0,1.0
1,40000000,50,2,yes,yes,333.139502,348.282221,2760000110,ok,7000,7.924608,197.107539,15.62,0,0,4526338048,439,78,2.6684,1.3342
2,40000000,50,3,yes,yes,397.628849,415.702904,2760000110,ok,7000,6.639357,292.950042,19.45,0,0,4526862336,1161,50,3.184951,1.06165
3,40000000,50,4,yes,yes,509.904054,533.081532,2760000110,ok,7000,5.177445,387.063532,20.04,0,0,4527910912,809,40,4.084259,1.021065
4,40000000,50,6,yes,yes,694.050739,725.598529,2760000110,ok,7000,3.803756,552.085815,21.0,0,0,4528824320,769,38,5.559248,0.926541
5,40000000,50,8,yes,yes,844.46744,882.852359,2760000110,ok,7000,3.126231,683.890677,21.38,0,0,4530757632,886,22,6.764065,0.845508
6,40000000,50,10,yes,yes,919.983756,961.801237,2760000110,ok,7000,2.869616,729.365909,20.93,0,0,4532109312,968,20,7.36894,0.736894


In [10]:
dfs_compression_off[0]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,40000000,50,1,no,yes,159.005986,166.233537,2760000110,ok,7000,16.603149,99.920807,16.59,0,0,4523524096,1,257,1.0,1.0
1,40000000,50,2,no,yes,418.440587,437.460631,2760000110,ok,7000,6.309139,195.747775,12.35,0,0,4524048384,3223,44,2.631603,1.315801
2,40000000,50,3,no,yes,604.666998,632.151887,2760000110,ok,7000,4.36604,287.903946,12.57,0,0,4524572672,412,38,3.802794,1.267598
3,40000000,50,4,no,yes,750.221182,784.322176,2760000110,ok,7000,3.518962,369.142922,12.99,0,0,4524929024,906,35,4.718195,1.179549
4,40000000,50,6,no,yes,956.970146,1000.468829,2760000110,ok,7000,2.758707,479.572539,13.23,0,0,4525400064,1332,18,6.018454,1.003076
5,40000000,50,8,no,yes,982.669679,1027.336523,2760000110,ok,7000,2.686559,482.029255,12.95,0,0,4524138496,1220,6,6.18008,0.77251
6,40000000,50,10,no,yes,972.385379,1016.584755,2760000110,ok,7000,2.714973,457.831449,12.43,0,0,4523868160,993,12,6.115401,0.61154


In [11]:
dfs_compression_on[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,20000000,300,1,yes,yes,173.790299,175.990144,6399998837,ok,7000,36.365666,99.95692,36.35,0,0,7594377216,1,555,1.0,1.0
1,20000000,300,2,yes,yes,472.931326,478.917711,6399998837,ok,7000,13.363462,196.356297,26.24,0,0,7594901504,8417,39,2.721276,1.360638
2,20000000,300,3,yes,yes,699.31814,708.170139,6399998837,ok,7000,9.037375,291.566978,26.35,0,0,7595425792,6131,45,4.023919,1.341306
3,20000000,300,4,yes,yes,873.272807,884.326733,6399998837,ok,7000,7.237143,382.471381,27.68,0,0,7595950080,4096,28,5.024865,1.256216
4,20000000,300,6,yes,yes,1094.623557,1108.47935,6399998837,ok,7000,5.773674,462.270613,26.69,0,0,7596765184,3945,17,6.298531,1.049755
5,20000000,300,8,yes,yes,1063.44982,1076.911015,6399998837,ok,7000,5.942923,465.091029,27.64,0,0,7597604864,2724,19,6.119155,0.764894
6,20000000,300,10,yes,yes,1029.776506,1042.811462,6399998837,ok,7000,6.137254,463.236488,28.43,0,0,7598960640,2401,17,5.925397,0.59254


In [12]:
dfs_compression_off[1]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,20000000,300,1,no,yes,301.932441,305.754376,6400000110,ok,7000,20.931835,99.94346,20.92,0,0,7596003328,1,313,1.0,1.0
1,20000000,300,2,no,yes,1172.998294,1187.846395,6400000110,ok,7000,5.387902,192.097031,10.35,0,0,7596527616,1729,73,3.884969,1.942485
2,20000000,300,3,no,yes,1132.130589,1146.461375,6400000110,ok,7000,5.582395,205.467371,11.47,0,0,7596527616,4793,34,3.749616,1.249872
3,20000000,300,4,no,yes,1118.965293,1133.12943,6400000110,ok,7000,5.648075,211.399456,11.94,0,0,7596523520,3661,32,3.706012,0.926503
4,20000000,300,6,no,yes,1094.622069,1108.478063,6400000110,ok,7000,5.773682,224.120405,12.94,0,0,7596511232,2298,29,3.625387,0.604231
5,20000000,300,8,no,yes,1061.166616,1074.599123,6400000110,ok,7000,5.955709,222.979314,13.28,0,0,7595851776,1818,23,3.514583,0.439323
6,20000000,300,10,no,yes,1030.066973,1043.105814,6400000110,ok,7000,6.135523,224.919687,13.8,0,0,7595773952,1537,31,3.411581,0.341158


In [13]:
dfs_compression_on[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,10000000,500,1,yes,yes,173.907927,175.252609,5199897879,ok,7000,29.670873,99.963355,29.66,0,0,5868138496,1,310,1.0,1.0
1,10000000,500,2,yes,yes,499.809528,503.674129,5199897879,ok,7000,10.323933,198.470876,20.49,0,0,5868134400,1772,85,2.873989,1.436995
2,10000000,500,3,yes,yes,717.992932,723.544559,5199897879,ok,7000,7.1867,294.850198,21.19,0,0,5868658688,2795,34,4.128581,1.376194
3,10000000,500,4,yes,yes,927.869471,935.043894,5199897879,ok,7000,5.561127,386.25264,21.48,0,0,5869002752,3026,34,5.335406,1.333852
4,10000000,500,6,yes,yes,1054.779546,1062.935256,5199897879,ok,7000,4.892018,455.844649,22.3,0,0,5869961216,3258,7,6.065161,1.01086
5,10000000,500,8,yes,yes,1044.800209,1052.878758,5199897879,ok,7000,4.938743,452.139315,22.33,0,0,5870936064,2487,22,6.007778,0.750972
6,10000000,500,10,yes,yes,1019.971154,1027.857721,5199897879,ok,7000,5.058967,452.661617,22.9,0,0,5871415296,2017,14,5.865007,0.586501


In [14]:
dfs_compression_off[2]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,10000000,500,1,no,yes,325.588804,328.112755,5200000110,ok,7000,15.848211,99.948191,15.84,0,0,5869043712,1,213,1.0,1.0
1,10000000,500,2,no,yes,1167.408549,1176.458253,5200000110,ok,7000,4.420046,174.206315,7.7,0,0,5869043712,9647,25,3.58553,1.792765
2,10000000,500,3,no,yes,1101.527392,1110.066388,5200000110,ok,7000,4.684405,164.588686,7.71,0,0,5869568000,6582,16,3.383186,1.127729
3,10000000,500,4,no,yes,1038.910716,1046.96431,5200000110,ok,7000,4.966741,164.494197,8.17,0,0,5870092288,4978,5,3.190867,0.797717
4,10000000,500,6,no,yes,1031.422561,1039.418107,5200000110,ok,7000,5.002799,165.907117,8.3,0,0,5868896256,3228,8,3.167869,0.527978
5,10000000,500,8,no,yes,1018.98092,1026.880018,5200000110,ok,7000,5.063883,163.313415,8.27,0,0,5868867584,2427,4,3.129656,0.391207
6,10000000,500,10,no,yes,1003.4353,1011.213889,5200000110,ok,7000,5.142335,164.711182,8.47,0,0,5868761088,1913,5,3.08191,0.308191


In [15]:
dfs_compression_on[3]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,yes,yes,147.373613,132.569302,22560606510,ok,7000,170.179719,99.959032,170.11,0,0,26104893440,1,2500,1.0,1.0
1,5000000,5000,2,yes,yes,366.330326,329.530875,22560606510,ok,7000,68.4628,199.349134,136.48,0,0,26105405440,10333,341,2.485725,1.242863
2,5000000,5000,3,yes,yes,547.501571,492.502692,22560606510,ok,7000,45.808088,298.637221,136.8,0,0,26106454016,5765,223,3.715058,1.238353
3,5000000,5000,4,yes,yes,719.679777,647.384859,22560606510,ok,7000,34.848832,396.713434,138.25,0,0,26106966016,4187,218,4.883369,1.220842
4,5000000,5000,6,yes,yes,1059.323556,952.909964,22560606510,ok,7000,23.675486,586.34488,138.82,0,0,26107412480,4860,69,7.188014,1.198002
5,5000000,5000,8,yes,yes,1197.221029,1076.955046,22560606510,ok,7000,20.948513,671.789934,140.73,0,0,26107330560,10649,99,8.123714,1.015464
6,5000000,5000,10,yes,yes,1122.99145,1010.182146,22560606510,ok,7000,22.333207,646.705164,144.43,0,0,26107641856,8450,60,7.620031,0.762003


In [16]:
dfs_compression_off[3]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,5000000,5000,1,no,yes,444.886458,445.241234,25100000111,ok,7000,56.373934,99.957543,56.35,0,0,26105925632,1,803,1.0,1.0
1,5000000,5000,2,no,yes,1113.907387,1114.795675,25100000111,ok,7000,22.515337,113.389376,25.53,0,0,26106449920,47053,82,2.503802,1.251901
2,5000000,5000,3,no,yes,1072.453587,1073.308818,25100000111,ok,7000,23.385627,112.034625,26.2,0,0,26106974208,31431,52,2.410623,0.803541
3,5000000,5000,4,no,yes,1082.236223,1083.099255,25100000111,ok,7000,23.174238,113.703846,26.35,0,0,26107498496,23704,38,2.432612,0.608153
4,5000000,5000,6,no,yes,1064.130935,1064.979529,25100000111,ok,7000,23.568528,122.621148,28.9,0,0,26106429440,15895,37,2.391916,0.398653
5,5000000,5000,8,no,yes,1048.296127,1049.132094,25100000111,ok,7000,23.924537,115.864309,27.72,0,0,26105200640,12017,16,2.356323,0.29454
6,5000000,5000,10,no,yes,1002.582083,1003.381594,25100000111,ok,7000,25.015408,131.918695,33.0,0,0,26104643584,10068,16,2.253568,0.225357


In [17]:
dfs_compression_on[4]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,1000000,20000,1,yes,yes,206.031621,164.109556,15943265698,ok,7000,97.150136,99.96898,97.12,0,0,20714127360,1,1279,1.0,1.0
1,1000000,20000,2,yes,yes,460.796299,367.036262,15943265698,ok,7000,43.437849,199.434369,86.63,0,0,20714651648,2701,342,2.236532,1.118266
2,1000000,20000,3,yes,yes,679.298072,541.078619,15943265698,ok,7000,29.46571,297.701973,87.72,0,0,20715700224,2975,200,3.297057,1.099019
3,1000000,20000,4,yes,yes,913.380801,727.531614,15943265698,ok,7000,21.91419,396.592353,86.91,0,0,20716707840,2915,89,4.433207,1.108302
4,1000000,20000,6,yes,yes,1319.672587,1051.153611,15943265698,ok,7000,15.167398,578.873167,87.8,0,0,20718047232,4868,67,6.405194,1.067532
5,1000000,20000,8,yes,yes,1314.611038,1047.121956,15943265698,ok,7000,15.225796,598.523701,91.13,0,0,20718436352,7485,54,6.380628,0.797578
6,1000000,20000,10,yes,yes,1252.047786,997.288695,15943265698,ok,7000,15.98661,581.111307,92.9,0,0,20719980544,5901,42,6.076969,0.607697


In [18]:
dfs_compression_off[4]

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary,speedup,efficiency
0,1000000,20000,1,no,yes,750.586345,750.848844,20023000111,ok,7000,26.667152,99.973181,26.66,0,0,20710109184,1,308,1.0,1.0
1,1000000,20000,2,no,yes,1071.933082,1072.307965,20023000111,ok,7000,18.672807,110.856389,20.7,0,0,20711157760,35663,36,1.428128,0.714064
2,1000000,20000,3,no,yes,1102.075178,1102.460602,20023000111,ok,7000,18.1621,115.515274,20.98,0,0,20712730624,24308,31,1.468286,0.489429
3,1000000,20000,4,no,yes,1129.7975,1130.192619,20023000111,ok,7000,17.716449,127.903734,22.66,0,0,20713254912,19758,39,1.50522,0.376305
4,1000000,20000,6,no,yes,1106.522693,1106.909672,20023000111,ok,7000,18.0891,143.898809,26.03,0,0,20713041920,9347,43,1.474211,0.245702
5,1000000,20000,8,no,yes,1061.68256,1062.053858,20023000111,ok,7000,18.853093,159.072042,29.99,0,0,20712931328,9672,23,1.414471,0.176809
6,1000000,20000,10,no,yes,1055.108287,1055.477286,20023000111,ok,7000,18.970565,208.1646,39.49,0,0,20713332736,8933,29,1.405712,0.140571


In [19]:
data = pd.read_csv("data.csv")

In [20]:
data

Unnamed: 0,keys,value_size,num_threads,rdbcompression,rdbchecksum,valkey_data_throughput_mb_s,actual_throughput_mb_s,rdb_file_size_bytes,status,port,save_duration_seconds,cpu_utilization_percent,cpu_total_time_seconds,io_read_bytes,io_write_bytes,memory_rss_bytes,context_switches_voluntary,context_switches_involuntary
0,10000000,1000,1,yes,yes,225.52575,226.413649,10200000111,ok,7000,45.050288,99.932768,45.02,0,10200006656,11162349568,70,188
1,10000000,1000,2,yes,yes,501.55273,503.527352,10200000111,ok,7000,20.257092,181.911595,36.85,0,10200002560,11163070464,11082,68
2,10000000,1000,3,yes,yes,471.234828,473.090089,10200000111,ok,7000,21.560376,226.804952,48.9,0,10200006656,11163271168,49026,57
3,10000000,1000,4,yes,yes,499.166399,501.131626,10200000111,ok,7000,20.353934,217.55008,44.28,0,10200002560,11163324416,43099,38
4,10000000,1000,6,yes,yes,502.034092,504.010609,10200000111,ok,7000,20.237669,216.279844,43.77,0,10200006656,11163721728,32139,33
5,10000000,1000,8,yes,yes,500.83683,502.808634,10200000111,ok,7000,20.286048,227.545552,46.16,0,10200002560,11164069888,25131,19
6,10000000,1000,10,yes,yes,500.423098,502.393273,10200000111,ok,7000,20.30282,234.006903,47.51,0,10200006656,11164233728,20560,30
