In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import numpy as np

In [2]:
matplotlib.use("pgf")
matplotlib.rcParams.update({
  'font.size': 8,
  'text.usetex': True,
  'text.latex.preamble': r'\usepackage{amsmath}',
  'pgf.texsystem': "pdflatex",
  'pgf.preamble': r'\usepackage{amsfonts}',
  #"font.family": "serif",
  #"pgf.rcfonts": False,
})

In [3]:
th_df = pd.read_csv("throughput_hash/throughput_hash-clang++.csv")

In [4]:
fig_width=6
fig_height=3

In [5]:
def hashfn_name(h):
    if "fibonacci" in h:
        return "Fibonacci"
    elif "mult" in h:
        return "Multiplicative"
    elif "murmur" in h:
        return "Murmur3"
    elif "aqua" in h:
        return "AquaHash"
    elif "xxh3" in h:
        return "XXHash"
    elif "city" in h:
        return "CityHash"
    elif "tabulation" in h:
        return "Tabulation"
    elif "meow" in h:
        return "MeowHash"
    
    return "UNKNOWN"

def reducer_name(r):
    return {
        "do_nothing": "No Reduction", 
        "fastrange64": "Fastrange",
        "fast_modulo": "Fast Modulo", 
        "modulo": "Modulo"
    }[r]

def order(h):
    if "fibonacci" in h:
        return 0
    elif "mult" in h:
        return 1
    elif "murmur" in h:
        return 2
    elif "aqua" in h:
        return 3
    elif "xxh3" in h:
        return 4
    elif "city" in h:
        return 5
    elif "tabulation" in h:
        return 6
    elif "meow" in h:
        return 7
    
    return "UNKNOWN"

In [6]:
th_df["throughput"] = th_df["nanoseconds_per_key"].apply(lambda x : 10**9 * 1.0 / x)
th_df["order"] = th_df["hash"].apply(order)
th_df = th_df[
    (th_df["dataset"] == "osm_cellids_200M_uint64")
    & (th_df["hash"].isin({"mult_fibonacci_prime64", "mult_prime64", "murmur_finalizer64", "aqua0_64", "xxh3_128_low", "city64", "tabulation_1x255_64", "meow64_low"}))
]
th_df

Unnamed: 0,dataset,numelements,hash,reducer,nanoseconds_total,nanoseconds_per_key,benchmark_repeat_cnt,throughput,order
546,osm_cellids_200M_uint64,200000000,city64,branchless_fast_modulo,642703877,3.213519,10,311185300.0,5
547,osm_cellids_200M_uint64,200000000,city64,do_nothing,412107930,2.06054,10,485309700.0,5
548,osm_cellids_200M_uint64,200000000,city64,fast_modulo,566259512,2.831298,10,353194900.0,5
549,osm_cellids_200M_uint64,200000000,city64,fastrange32,481999815,2.409999,10,414937900.0,5
550,osm_cellids_200M_uint64,200000000,city64,fastrange64,481697527,2.408488,10,415198200.0,5
551,osm_cellids_200M_uint64,200000000,city64,modulo,1527489517,7.637448,10,130933800.0,5
552,osm_cellids_200M_uint64,200000000,meow64_low,branchless_fast_modulo,4222613412,21.113067,10,47364030.0,7
553,osm_cellids_200M_uint64,200000000,meow64_low,do_nothing,3599357901,17.99679,10,55565460.0,7
554,osm_cellids_200M_uint64,200000000,meow64_low,fast_modulo,4063300800,20.316504,10,49221070.0,7
555,osm_cellids_200M_uint64,200000000,meow64_low,fastrange32,3795694916,18.978475,10,52691270.0,7


In [7]:
def plot_throughput(data):
    fig, axs = plt.subplots(2, 2, figsize=(fig_width, fig_height), sharex=True, sharey=True)

    reducers = {'do_nothing': 0, 'modulo': 1, 'fastrange64': 2, 'fast_modulo': 3}
    data = data[data["reducer"].isin(reducers)].copy(deep=True)
    data["reducer"] = pd.Categorical(data["reducer"], reducers.keys())
    data = data.sort_values(by=["reducer", "throughput"], key=lambda x: x.map(reducers))
    for i, (reducer, df) in enumerate(data.groupby(["reducer"], sort=False)):
        ax = axs[int(i / 2)][i % 2]
        
        ax.set_title(reducer_name(reducer))
        ax.set_ylabel("Keys per Second")
        ax.set_yticks(np.arange(0, 10**9+1, 0.25 * 10**9))
        ax.tick_params(
            axis='x',
            which='both',
            top=False, 
            bottom=False, 
            labelbottom=False)
        ax.grid(linestyle="--", axis="y")

        df = df.sort_values(by=["order"])
        for j, (name, d) in enumerate(df.groupby(["hash"], sort=False)):
            ax.bar(data=d, x=j, label=hashfn_name(name), height="throughput")
            
    # legend in top right plot
    axs[0][1].legend(loc="best", ncol=2, borderpad=0.4, labelspacing=0.2, handlelength=1.5, handletextpad=0.3, columnspacing=1.0)

    fig.savefig("plots/throughput_hash.pdf", bbox_inches="tight", dpi=300)
    fig.savefig("plots/throughput_hash.pgf", bbox_inches="tight", dpi=300)
        
plot_throughput(th_df)