In [None]:
import re
from pathlib import Path
from typing import Dict

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:

def read_mnist_log_file(path: Path):
    lines = path.read_text().splitlines()
    progress_lines = [l for l in lines if "fit progress: (" in l]
    entries = []
    for l in progress_lines:
        x = l.split("fit progress: ")[-1]
        round, loss, metrics, time = eval(x)
        entries.append(
            {
                "round": round,
                "loss": loss,
                "metrics": metrics,
                "accuracy": metrics.get("accuracy"),
                "time_since_start": time,
                "time": time if len(entries) == 0 else (time - entries[-1]["time_since_start"])
            }
        )
    times_agg_eval = []
    for idx, line in enumerate(lines):
        if idx + 1 == len(lines):
            break
        next_line = lines[idx + 1]
        if "fit_round received" not in line:
            continue
        if "fit progress: (" in next_line:
            t_start = pd.to_datetime(" ".join(line.split(" ")[2:4]))
            t_end = pd.to_datetime(" ".join(next_line.split(" ")[2:4]))
            times_agg_eval.append((t_end - t_start).total_seconds())
    #"time_agg_eval": time_agg_eval,
    df = pd.DataFrame.from_records(entries)
    df["time_agg_eval"] = times_agg_eval
    new_dtypes = {"time": float, "loss": float, "accuracy": float,
                  "time_since_start": float, "round": int,
                  "time_agg_eval": float
                  }
    if not df.empty:
        df = df.astype(new_dtypes)
    return df


def read_femnist_log_file(f_err: Path, f_out: Path):
    out_lines = f_out.read_text().splitlines()
    eval_entries = []
    for l in out_lines:
        matches = re.findall(r"EvaluateRes([^\)]+)", l)
        for m in matches:
            print(eval(f"dict{m})"))

    return None

#df = read_femnist_log_file(Path("/Users/andreas/workspace/thesis-code/out/flower-logs/fedless_femnist_100_100_10_8162.err"),
#                           Path("/Users/andreas/workspace/thesis-code/out/flower-logs/fedless_femnist_100_100_10_8162.out"))
#df

In [None]:
files = []
dfs = []
LOG_FOLDER = Path("/Users/andreas/workspace/thesis-code/out/flower-logs")
for f in LOG_FOLDER.glob("fedless_*.err"):
    if (len(f.name.split("_"))) > 6:  # Local Client Log
        continue
    _, dataset, clients_in_round, clients_total, local_epochs, seed = f.name.split("_")
    seed = seed.split(".")[0]
    if dataset == "mnist":  # All required data lies in .err file
        logs_df = read_mnist_log_file(f)
    elif dataset == "femnist":
        logs_df = read_femnist_log_file(f_err=f, f_out=f.with_suffix(".out"))

    if logs_df.empty:
        continue

    index = pd.MultiIndex.from_tuples(
        [(
            dataset,
            clients_in_round,
            clients_total,
            local_epochs,
            seed
        )] * len(logs_df),
        names=[
            "dataset",
            "clients_in_round",
            "clients_total",
            "local_epochs",
            "seed"
        ]
    )
    #print(index)
    df = pd.DataFrame(logs_df.values, index=index, columns=logs_df.columns)  # .reset_index()
    df = df.astype(logs_df.dtypes)

    integer_index_levels = [1, 2, 3]
    for i in integer_index_levels:
        df.index = df.index.set_levels(df.index.levels[i].astype(int), level=i)
    dfs.append(df)
dfs = pd.concat(dfs)
dfs = dfs.sort_index()

In [None]:
#dfs.loc[{"seed": 25738}]
#mnist_dfs = dfs.loc[("mnist", "100", "100", "10")]
mnist_dfs = dfs.loc[("mnist", 100, 100, 10)]
mnist_dfs[mnist_dfs["accuracy"] >= 0.99].groupby("seed").min("round")["round"].mean()
#sns.lineplot(x="round", y="accuracy", data=mnist_dfs)

In [None]:
df_ = dfs.loc[("mnist", slice(75, 100), 100, slice(1, 100)), :]
(
    df_[df_["accuracy"] >= 0.99].groupby(by=["seed", "clients_in_round", "local_epochs"]).min("round")
        .groupby(["clients_in_round", "local_epochs"])
        .mean()
)

In [None]:
df_ = dfs.loc[("mnist", 100, 100, slice(1, 100)), :]
df_ = df_[df_["accuracy"] >= 0.99].groupby(level=[1, 2, 3, 4]).min("round")
df_ = df_.groupby(level=[0, 2]).mean()
sns.barplot(y="round", x="local_epochs", data=df_.reset_index()) # hue="local_epochs",

In [None]:
df_ = dfs.loc[("mnist", 100, 100, slice(1, 100)), :]
df_ = df_[df_["accuracy"] >= 0.99].groupby(level=[1, 2, 3, 4]).min("round")
df_ = df_.groupby(level=[0, 2]).mean()
sns.barplot(y="time_since_start", x="local_epochs", data=df_.reset_index())  # hue="local_epochs",

In [None]:
df_ = dfs.loc[("mnist", 100, 100, slice(1, 100)), :]
df_ = df_[df_["round"] > 1]
sns.barplot(y="time", x="local_epochs", data=df_.reset_index())  #  hue="local_epochs"

In [None]:
df_ = dfs.loc[("mnist", 100, 100, slice(1, 100)), :]
df_ = df_[df_["round"] > 1]
df_["time_wo_agg_eval"] = df_["time"] - df_["time_agg_eval"]
df_[["time_wo_agg_eval", "time_agg_eval"]].mean(level=[3]).plot.bar(stacked=True)