In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import numpy as np

In [2]:
matplotlib.use("pgf")
matplotlib.rcParams.update({
  'font.size': 8,
  'text.usetex': True,
  'text.latex.preamble': r'\usepackage{amsmath}',
  'pgf.texsystem': "pdflatex",
  'pgf.preamble': r'\usepackage{amsfonts}',
  #"font.family": "serif",
  #"pgf.rcfonts": False,
})

In [3]:
th_gcc_df = pd.read_csv("throughput_hash/throughput_hash-g++.csv")
th_clang_df = pd.read_csv("throughput_hash/throughput_hash-clang++.csv")

In [4]:
fig_width=6
fig_height=3

In [5]:
def hashfn_name(h):
    if "fibonacci" in h:
        return "Fibonacci"
    elif "mult" in h:
        return "Multiplicative"
    elif "murmur" in h:
        return "Murmur3"
    elif "aqua" in h:
        return "AquaHash"
    elif "xxh3" in h:
        return "XXHash"
    elif "city" in h:
        return "CityHash"
    elif "tabulation" in h:
        return "Tabulation"
    elif "meow" in h:
        return "MeowHash"
    
    return "UNKNOWN"

def reducer_name(r):
    return {
        "do_nothing": "No Reduction", 
        "fastrange64": "Fastrange",
        "fast_modulo": "Fast Modulo", 
        "modulo": "Modulo"
    }[r]

def order(h):
    if "fibonacci" in h:
        return 0
    elif "mult" in h:
        return 1
    elif "murmur" in h:
        return 2
    elif "aqua" in h:
        return 3
    elif "xxh3" in h:
        return 4
    elif "city" in h:
        return 5
    elif "tabulation" in h:
        return 6
    elif "meow" in h:
        return 7
    
    return "UNKNOWN"

In [6]:
th_gcc_df["throughput"] = th_gcc_df["nanoseconds_per_key"].apply(lambda x : 10**9 * 1.0 / x)
th_gcc_df["order"] = th_gcc_df["hash"].apply(order)
th_gcc_df = th_gcc_df[
    (th_gcc_df["dataset"] == "osm_cellids_200M_uint64")
    & (th_gcc_df["hash"].isin({"mult_fibonacci_prime64", "mult_prime64", "murmur_finalizer64", "aqua0_64", "xxh3_128_low", "city64", "tabulation_1x255_64", "meow64_low"}))
]

th_clang_df["throughput"] = th_clang_df["nanoseconds_per_key"].apply(lambda x : 10**9 * 1.0 / x)
th_clang_df["order"] = th_clang_df["hash"].apply(order)
th_clang_df = th_clang_df[
    (th_clang_df["dataset"] == "osm_cellids_200M_uint64")
    & (th_clang_df["hash"].isin({"mult_fibonacci_prime64", "mult_prime64", "murmur_finalizer64", "aqua0_64", "xxh3_128_low", "city64", "tabulation_1x255_64", "meow64_low"}))
]

In [8]:
def plot_throughput(data, outname):
    fig, axs = plt.subplots(2, 2, figsize=(fig_width, fig_height), sharex=True, sharey=True)

    reducers = {'do_nothing': 0, 'modulo': 1, 'fastrange64': 2, 'fast_modulo': 3}
    data = data[data["reducer"].isin(reducers)].copy(deep=True)
    data["reducer"] = pd.Categorical(data["reducer"], reducers.keys())
    data = data.sort_values(by=["reducer", "throughput"], key=lambda x: x.map(reducers))
    for i, (reducer, df) in enumerate(data.groupby(["reducer"], sort=False)):
        ax = axs[int(i / 2)][i % 2]
        
        ax.set_title(reducer_name(reducer))
        ax.set_ylabel("Keys per Second")
        ax.set_yticks(np.arange(0, 10**9+1, 0.25 * 10**9))
        ax.tick_params(
            axis='x',
            which='both',
            top=False, 
            bottom=False, 
            labelbottom=False)
        ax.grid(linestyle="--", axis="y")

        df = df.sort_values(by=["order"])
        for j, (name, d) in enumerate(df.groupby(["hash"], sort=False)):
            ax.bar(data=d, x=j, label=hashfn_name(name), height="throughput")
            
    # legend in top right plot
    axs[0][1].legend(loc="best", ncol=2, borderpad=0.4, labelspacing=0.2, handlelength=1.5, handletextpad=0.3, columnspacing=1.0)

    fig.savefig(f"plots/{outname}.pdf", bbox_inches="tight", dpi=300)
    fig.savefig(f"plots/{outname}.pgf", bbox_inches="tight", dpi=300)
        
plot_throughput(th_gcc_df, "throughput_hash_gcc")
plot_throughput(th_clang_df, "throughput_hash_clang")