# Imports

In [None]:
import numpy as np
import timeit
import plotly.express as px
import pandas as pd
from time import process_time

# Calculate the times

In [None]:
def matmul(size: int) -> float:
    setup_code = f"""import numpy as np
size = {size}
x = np.random.randn({size}, {size})
y = np.random.randn({size}, {size})
res = np.zeros(({size}, {size}))"""

    loop = """for i in range(size):
        for j in range(size):
            for k in range(size):
                res[i][j] += x[i][k] * y[k][j]"""
    loop_time = timeit.timeit(loop, setup=setup_code, number=1)

    numpy = """res2 = x @ y"""
    numpy_time = timeit.timeit(numpy, setup=setup_code, number=1)

    return size, loop_time, numpy_time

In [None]:
reps = 10
num = 10
l_mean = np.zeros(num)
n_mean = np.zeros(num)
l_std = np.zeros(num)
n_std = np.zeros(num)
l_times = np.ndarray((reps, num))
n_times = np.ndarray((reps, num))

for i in range(reps):
    sizes = []

    for siz in range(num):
        sz, lt, nt = matmul(size=siz)
        sizes.append(sz)
        l_times[i, siz] = lt
        n_times[i, siz] = nt

        l_mean[siz] += l_times[i, siz]
        n_mean[siz] += n_times[i, siz]

l_mean = l_mean / reps
n_mean = n_mean / reps

for entry in range(num):
    l_std[entry] = np.std(l_times[:, entry])
    n_std[entry] = np.std(n_times[:, entry])

# Plot Time Differences

In [None]:
df = pd.DataFrame(
    {
        "size": sizes,
        "loop": l_mean,
        "numpy": n_mean,
    }
)

df = df.melt(id_vars="size", value_vars=["loop", "numpy"])

df["e"] = np.append(l_std, n_std)


px.scatter(
    df,
    x="size",
    y="value",
    color="variable",
    error_y="e",
    width=1500,
    height=800,
)

In [None]:
px.scatter(
    df,
    x="size",
    y="value",
    color="variable",
    log_y=True,
    error_y="e",
    width=1500,
    height=800,
)

# Save and Load Dataframe

In [None]:
# df.to_json("../data/dataframe_200_32.json")

In [None]:
# df_load = pd.read_json("../data/dataframe_200_32.json")

# Plot the loaded Dataframe

In [None]:
# px.scatter(
#     df_load,
#     x="size",
#     y="value",
#     color="variable",
#     log_y=True,
#     error_y="e",
#     width=1500,
#     height=800,
# )

# Only Benchmarks a single Operation

In [None]:
def loop_time(size: int) -> float:
    setup_code = f"""import numpy as np
size = {size}
x = np.random.randn({size}, {size})
y = np.random.randn({size}, {size})
res = np.zeros(({size}, {size}))"""

    loop = """for i in range(size):
        for j in range(size):
            for k in range(size):
                res[i][j] += x[i][k] * y[k][j]"""

    loop_time = timeit.timeit(loop, setup=setup_code, number=1)

    return size, loop_time


def bench_loop(size: int, reps: int) -> pd.DataFrame:

    loop_times = np.ndarray((reps, size))
    loop_mean = np.zeros(size)
    loop_std = np.zeros(size)

    for i in range(reps):
        sizes = []
        for siz in range(size):
            sz, lt = loop_time(size)
            sizes.append(sz)

            loop_times[i, siz] = lt
            loop_mean[siz] += loop_times[i, siz]

    loop_mean = loop_mean / reps

    for entry in range(size):
        loop_std[entry] = np.std(loop_times[:, entry])

    df = pd.DataFrame(
        {
            "size": sizes,
            "time": loop_mean,
            "deviation": loop_std,
        }
    )

    return df

In [None]:
def numpy_time(size: int) -> float:
    setup_code = f"""import numpy as np
size = {size}
x = np.random.randn({size}, {size})
y = np.random.randn({size}, {size})"""

    numpy = """res2 = x @ y"""
    numpy_time = timeit.timeit(numpy, setup=setup_code, number=1)

    return size, numpy_time


def bench_numpy_old(size: int, reps: int) -> pd.DataFrame:

    numpy_times = np.ndarray((reps, size))
    numpy_mean = np.zeros(size)
    numpy_std = np.zeros(size)

    for i in range(reps):
        sizes = []
        for siz in range(size):
            sz, nt = numpy_time(size=siz)
            sizes.append(sz)

            numpy_times[i, siz] = nt
            numpy_mean[siz] += numpy_times[i, siz]
            print(f"{i}. Iteration") if i % 100 == 0 else None

    numpy_mean = numpy_mean / reps

    for entry in range(size):
        numpy_std[entry] = np.std(numpy_times[:, entry])

    df = pd.DataFrame(
        {
            "size": sizes,
            "time": numpy_mean,
            "deviation": numpy_std,
        }
    )

    return df

# Calculate the times

In [None]:
df_numpy = bench_numpy_old(2000, 32)

# Save and Load the Dataframe

In [None]:
df_numpy.to_json("../data/df_numpy_2000_32.json")

In [None]:
df_numpy = pd.read_json("../data/df_numpy_1000_32.json")

# Plot the times

In [None]:
px.scatter(
    df_numpy,
    x="size",
    y="time",
    error_y="deviation",
    width=2200,
    height=800,
    # range_y=([0, df_numpy["deviation"].max()]),
)

In [None]:
px.scatter(
    df_numpy,
    x="size",
    y="time",
    error_y="deviation",
    log_y=True,
    width=1200,
    height=800,
)

# Numpy Benchmark v2

In [None]:
def bench_numpy(size: int, iterations: int) -> tuple[np.ndarray, np.ndarray]:
    """
    Berechnet die Zeiten die Numpy braucht um quadratische Arrays von den Größen 1 bis size zu multiplizieren und gibt deren Mittelwert über die iterations und deren Standardabweichung zurück.

    :params int size: Größe des Arrays bis zu welchem Zeit berechnet werden soll
    :params int iterations: Iterationen über die gemittelt werden soll
    :returns: Tupel von zwei Listen (Mittelwerte, Standardabweichung)
    """
    means = np.zeros(size)
    deviations = np.zeros(size)
    for i in range(size):
        ts = np.zeros(iterations)
        for j in range(iterations):
            x = np.random.randn(i + 1, i + 1)
            y = np.random.randn(i + 1, i + 1)

            start_time = process_time()
            z = x @ y
            end_time = process_time()

            time = end_time - start_time
            ts[j] += time

        means[i] += np.mean(ts)
        deviations[i] += np.std(ts)

    return (means, deviations)

In [None]:
means, deviations = bench_numpy(500, 100)

df = pd.DataFrame(
    {
        "size": np.arange(len(means)) + 1,
        "time": means,
        "deviation": deviations,
    }
)

In [None]:
px.scatter(df, x="size", y="time", error_y="deviation")