In [None]:
import re

import pandas as pd
from pathlib import Path
from ploting import *

In [None]:
root_path = "guyot"

In [None]:
time_re = re.compile("apply - inverse mass -.*\|\s+(\d+)\s+\|\s+(\d+(\.\d+)?)")

In [None]:
iter_re = re.compile("DEAL:.*::iterations:\s*(\d+)")

In [None]:
batch_iter_re = re.compile("\[(\d+)\|(\d+)\|")

In [None]:
def config_data(filename: str):
    items = filename.replace(".out", "").split("-")
    return dict((key, value) for key, value in [item.split("_", maxsplit=1) for item in items])

In [None]:
data = []
for file in Path(f"{root_path}/data").iterdir():
    with open(file, "r") as input:
        if t_m := time_re.search(input.read()):
            data.append({**config_data(file.name),
                         "time": t_m.group(2), "timesteps": t_m.group(1)})
df = pd.DataFrame(data).astype({"dim": int, "ncells": int, "degree": int, "timesteps": int, "time": float}).drop(columns=["cpu", "dryrun", "fmt", "refinement", "dim", "batchsize", "exec", "gpu"])
df

In [None]:
def get_iters(testcase, refinement, degree):
    data = dict()
    for file in Path(f"{root_path}/data").iterdir():
        if f"testcase_{testcase}" in file.name and f"refinement_{refinement}" in file.name and f"degree_{degree}"in file.name:
            matrix_iters = list()
            batch_iters = list()
            timesteps = 1
            with open(file, "r") as input:
                for i_m in iter_re.finditer(input.read()):
                    matrix_iters.append(int(i_m.group(1)))
            with open(file, "r") as input:
                for l in input:
                    if i_m:= batch_iter_re.search(l):
                        batch_iters += map(int, l[i_m.end():-2].split(","))
            with open(file, "r") as input:
                if t_m := time_re.search(input.read()):
                    timesteps = int(t_m.group(1))
            if matrix_iters:
                data["matrix"] = pd.Series(matrix_iters)
            if batch_iters:
                data["batch"] = pd.Series(batch_iters)
    return pd.DataFrame(data), timesteps

In [None]:
iters, timesteps = get_iters(1, 3, 5)
print(iters.matrix.mean())
print(iters.batch.describe())

In [None]:
fig, ax = plt.subplots()
iters.batch.hist(axes=ax, bins=range(iters.batch.min(), iters.batch.max() + 1), weights=[1 / timesteps] * len(iters.batch),
                 align="left")
ax.axvline(iters.matrix.mean(), c='r')
ax.set_title("Batch Iteration Distribution")
ax.legend(["Full matrix"])

In [None]:
df['time_normalized'] = df.time / df.timesteps / df.ncells
df

In [None]:
df["dofs/s"] = (df.ncells * (4 * (df.degree + 1) ** 2)) / df.time / df.timesteps
df

In [None]:
df0 = df[df.testcase == "0"]
df1 = df[df.testcase == "1"]

In [None]:
t1 = df1.set_index(["ncells", "degree", "solver"])["dofs/s"].sort_index()
t1.unstack([1, 2])[5].plot(logx=True, logy=True)

In [None]:
t1 = df1.set_index(["ncells", "degree", "solver"])["time_normalized"].sort_index()
t0 = df0.set_index(["ncells", "degree", "solver"])["time_normalized"].sort_index()

In [None]:
t1.unstack([1, 2])[5]

In [None]:
fig, axes = plt.subplots(1, 3)
data = t1.unstack([1, 2])
data[3].plot(ax=axes[0], logx=True, logy=True)
axes[0].set_title("P3")
data[5].plot(ax=axes[1], logx=True, logy=True)
axes[1].set_title("P5")
data[7].plot(ax=axes[2], logx=True, logy=True)
axes[2].set_title("P7")

In [None]:
fig, axes = plt.subplots(1, 3)
data = t0.unstack([1, 2])
data[3].plot(ax=axes[0], logx=True, logy=True)
axes[0].set_title("P3")
data[5].plot(ax=axes[1], logx=True, logy=True)
axes[1].set_title("P5")
data[7].plot(ax=axes[2], logx=True, logy=True)
axes[2].set_title("P7")

In [None]:
a = pd.Series([1,2,3])
b = pd.Series([4,5,6, 7])
p = pd.DataFrame({"a": a, "b": b})
p

In [None]:
p.columns