In [None]:
import seaborn as sns

from zarr_benchmarks.parse_json_for_plots import get_benchmarks_dataframe

In [None]:
package_paths_dict = {
    "zarr_python_2": "../../data/json/full_test_run_1/0002_zarr-python-v2.json",
    "zarr_python_3": "../../data/json/full_test_run_1/0003_zarr-python-v3.json",
    "tensorstore": "../../data/json/full_test_run_1/0004_tensorstore.json",
}

benchmarks_df = get_benchmarks_dataframe(package_paths_dict)

In [None]:
benchmarks_df.head()

In [None]:
chunk_benchmarks = benchmarks_df[
    (benchmarks_df.blosc_shuffle == "shuffle")
    & (benchmarks_df.compressor == "blosc-zstd")
    & (benchmarks_df.compression_level == 3)
]

In [None]:
chunk_benchmarks.head()

# chunk size

In [None]:
read_benchmarks = chunk_benchmarks[chunk_benchmarks.group == "read"]
write_benchmarks = chunk_benchmarks[chunk_benchmarks.group == "write"]

In [None]:
# Larger chunk sizes compress worst (don't see expected jumps at 64/128?)
graph = sns.relplot(
    data=read_benchmarks,
    x="chunk_size",
    y="compression_ratio",
    col="package",
    height=4,
    aspect=1.5,
)
graph.set_axis_labels("Chunk size", "Compression ratio")

In [None]:
read_benchmarks.columns

In [None]:
# Larger chunk sizes tend to read faster + tensorstore is the fastest
graph = sns.relplot(
    data=read_benchmarks,
    x="chunk_size",
    y="stats.mean",
    hue="package",
    height=4,
    aspect=1.5,
)
graph.set_axis_labels("Chunk size", "Read time")

In [None]:
# Larger chunk sizes tend to write faster
graph = sns.relplot(
    data=write_benchmarks,
    x="chunk_size",
    y="stats.mean",
    hue="package",
    height=4,
    aspect=1.5,
)
graph.set_axis_labels("Chunk size", "Write time")