In [10]:
# Performance Analysis of Different Tree Implementations

# This notebook analyzes and visualizes the performance characteristics of different tree implementations:
# - BTreeOLC
# - ARTOLC
# - OptiQL
# - MassTree
# - FBTree

# We'll analyze:
# 1. Throughput vs Thread Count
# 2. Average Latency vs Thread Count
# 3. Read Latency vs Thread Count
# 4. Write Latency vs Thread Count
# 5. Latency vs Throughput


In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os

# Set style
# plt.style.use('seaborn')
# sns.set_palette("husl")

# Create figures directory if it doesn't exist
os.makedirs('results/figures', exist_ok=True)
for workload in ['YCSB-A', 'YCSB-B', 'YCSB-C']:
    os.makedirs(f'results/figures/{workload}', exist_ok=True)

# Function to load data for a system and workload
def load_system_data(system_name, workload_file):
    path = f'results/{system_name.lower()}_results/{workload_file}'
    if os.path.exists(path):
        df = pd.read_csv(path)
        df['System'] = system_name
        print(df)   
        return df
    return None

# Dictionary to map file names to YCSB workload names
workload_files = {
    'a.csv': 'YCSB-A',
    'b.csv': 'YCSB-B',
    'c.csv': 'YCSB-C'
}

# Load data for all systems and workloads
systems = ['BTreeOLC', 'ARTOLC', 'OptiQL', 'MassTree', 'FBTree']
data_by_workload = {}

for workload_file, workload_name in workload_files.items():
    dfs = []
    for system in systems:
        df = load_system_data(system, workload_file)
        if df is not None:
            dfs.append(df)
    
    if dfs:
        data_by_workload[workload_name] = pd.concat(dfs, ignore_index=True)
        print(f"\nData loaded for {workload_name}")
        print(f"Systems found: {data_by_workload[workload_name]['System'].unique()}")


In [15]:
# Common plotting function
def create_plot(x, y, data, title, xlabel, ylabel, workload, plot_type, figsize=(12, 6)):
    print(data)
    plt.figure(figsize=figsize)
    sns.lineplot(data=data, x=x, y=y, hue='System', marker='o')
    plt.title(f'{title} - {workload}', fontsize=14)
    plt.xlabel(xlabel, fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(title='System', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    
    # Save plot in the appropriate workload directory
    filename = f'results/figures/{workload}/{plot_type}.png'
    plt.savefig(filename, bbox_inches='tight', dpi=300)
    plt.close()  # Close the figure to free memory

# Create plots for each workload
for workload_name, data in data_by_workload.items():
    # 1. Throughput vs Thread Count
    create_plot(
        'Thread Count',
        'Throughput (ops/s)',
        data,
        'Throughput vs Thread Count',
        'Number of Threads',
        'Throughput (ops/s)',
        workload_name,
        'throughput_vs_threads'
    )

In [13]:
# Create remaining plots for each workload
for workload_name, data in data_by_workload.items():
    # 2. Average Latency vs Thread Count
    create_plot(
        'Thread Count',
        'Avg Latency (ns)',
        data,
        'Average Latency vs Thread Count',
        'Number of Threads',
        'Average Latency (ns)',
        workload_name,
        'avg_latency_vs_threads'
    )

    # 3. Read Latency vs Thread Count
    create_plot(
        'Thread Count',
        'Avg Read Lat (ns)',
        data,
        'Read Latency vs Thread Count',
        'Number of Threads',
        'Read Latency (ns)',
        workload_name,
        'read_latency_vs_threads'
    )


In [14]:
# Create remaining plots for each workload
for workload_name, data in data_by_workload.items():
    # 4. Write Latency vs Thread Count
    create_plot(
        'Thread Count',
        'Avg Write Lat (ns)',
        data,
        'Write Latency vs Thread Count',
        'Number of Threads',
        'Write Latency (ns)',
        workload_name,
        'write_latency_vs_threads'
    )

    # 5. Latency vs Throughput
    create_plot(
        'Throughput (ops/s)',
        'Avg Latency (ns)',
        data,
        'Latency vs Throughput',
        'Throughput (ops/s)',
        'Average Latency (ns)',
        workload_name,
        'latency_vs_throughput'
    )
