In [None]:
import numpy as np
import blosc2
import time
import plotly.express as px
import pandas as pd

In [None]:
sizes = [(100, 100), (500, 500), (500, 1000), (1000, 1000), (2000, 2000), (3000, 3000), (4000, 4000), (5000, 5000)]
sizes_mb = [(np.prod(size) * 8) / 2**20 for size in sizes]  # Convert to MB
results = {"numpy": [], "blosc2": []}

In [None]:
for method in ["numpy", "blosc2"]:
    for size in sizes:
        arr = np.random.rand(*size)
        arr_b2 = blosc2.asarray(arr)

        start_time = time.perf_counter()

        if method == "numpy":
            np.transpose(arr).copy()
        elif method == "blosc2":
            blosc2.transpose(arr_b2)

        end_time = time.perf_counter()
        time_b = end_time - start_time

        print(f"{method}: shape={size}, Performance = {time_b:.6f} s")
        results[method].append(time_b)

In [None]:
df = pd.DataFrame({
    "Matrix Size (MB)": sizes_mb,
    "NumPy Time (s)": results["numpy"],
    "Blosc2 Time (s)": results["blosc2"]
})

fig = px.line(df,
              x="Matrix Size (MB)",
              y=["NumPy Time (s)", "Blosc2 Time (s)"],
              title="Performance of Matrix Transposition (NumPy vs Blosc2)",
              labels={"value": "Time (s)", "variable": "Method"},
              markers=True)

fig.show()

In [None]:
%%time
shapes = [
    (100, 100), (2000, 2000), (3000, 3000), (4000, 4000), (3000, 7000),
    (5000, 5000), (6000, 6000), (7000, 7000), (8000, 8000), (6000, 12000),
    (9000, 9000), (10000, 10000),
    (10500, 10500), (11000, 11000), (11500, 11500), (12000, 12000),
    (12500, 12500), (13000, 13000), (13500, 13500), (14000, 14000),
    (14500, 14500), (15000, 15000), (15500, 15500), (16000, 16000),
    (16500, 16500), (17000, 17000)
]
chunkshapes = [None, (150, 300), (200, 500), (500, 200), (1000, 1000)]

sizes = []
time_total = []
chunk_labels = []

for shape in shapes:
    size_mb = (np.prod(shape) * 8) / (2 ** 20)

    matrix_np = np.linspace(0, 1, np.prod(shape)).reshape(shape)

    t0 = time.perf_counter()
    result_numpy = np.transpose(matrix_np).copy()
    numpy_time = time.perf_counter() - t0

    time_total.append(numpy_time)
    sizes.append(size_mb)
    chunk_labels.append("NumPy")

    print(f"NumPy:  Shape={shape}, Time = {numpy_time:.6f} s")

    for chunk in chunkshapes:
        matrix_blosc2 = blosc2.asarray(matrix_np, chunks=chunk)

        t0 = time.perf_counter()
        result_blosc2 = blosc2.transpose(matrix_blosc2)
        blosc2_time = time.perf_counter() - t0

        sizes.append(size_mb)
        time_total.append(blosc2_time)
        chunk_labels.append(f"{chunk[0]}x{chunk[1]}" if chunk else "Auto")

        print(f"Blosc2: Shape={shape}, Chunks = {matrix_blosc2.chunks}, Time = {blosc2_time:.6f} s")

df = pd.DataFrame({
    "Matrix Size (MB)": sizes,
    "Time (s)": time_total,
    "Chunk Shape": chunk_labels
})

fig = px.line(df,
              x="Matrix Size (MB)",
              y="Time (s)",
              color="Chunk Shape",
              title="Performance of Matrix Transposition (Blosc2 vs NumPy)",
              labels={"value": "Time (s)", "variable": "Metric"},
              markers=True)
fig.show()