In [4]:
import traceback
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

# Extraction function
def tflog2pandas(path):
    runlog_data = pd.DataFrame({"metric": [], "value": [], "step": []})
    try:
        event_acc = EventAccumulator(path)
        event_acc.Reload()
        tags = event_acc.Tags()["scalars"]
        for tag in tags:
            event_list = event_acc.Scalars(tag)
            values = list(map(lambda x: x.value, event_list))
            step = list(map(lambda x: x.step, event_list))
            r = {"metric": [tag] * len(step), "value": values, "step": step}
            r = pd.DataFrame(r)
            runlog_data = pd.concat([runlog_data, r])
    # Dirty catch of DataLossError
    except Exception:
        print("Event file possibly corrupt: {}".format(path))
        traceback.print_exc()
    return runlog_data


In [5]:
def pivot_df(df):
    df_pivot = df.pivot(index = "step", columns = "metric", values = ["value"])
    df_pivot = df_pivot.droplevel(level=0,axis=1)
    df_pivot.columns.name = None
    return df_pivot

In [6]:
import os
import fnmatch
import json

for path,dirs,files in os.walk("G:\\Meine Ablage\\Masterarbeit\\fantastic-umbrella\\finished_runs\\04_mod_runs"):
    for file in fnmatch.filter(files,'run_overview.json'):
        df = tflog2pandas(path + "\\glue_no_trainer")
        df = pivot_df(df)
        df.index = df.index.astype("int")
        df.epoch = df.epoch.astype("int")
        df.to_csv(path + "\\tensorboard_data.csv")

        
        file_path = os.path.abspath(os.path.join(path,file))
        print(f'Found file at: {file_path}')
        with open(file_path, 'r') as f:
            d = json.load(f)

        if "accuracy" in df.columns:
            d["max_acc"] = df["accuracy"].max()
        else:
            d["max_acc"] = None

        if "matthews_correlation" in df.columns:
            d["max_matthews"] = df["matthews_correlation"].max()
        else:
            d["max_matthews"] = None
             
        
        d["min_evalLoss"] = df["eval_loss"].min()
        d["min_evalLoss_step"] = int(df["eval_loss"].idxmin())
        d["min_evalLoss_epoch"] = int(df.loc[d["min_evalLoss_step"],"epoch"])

        d["min_trainLoss"] = df["train_loss"].min()
        d["min_trainLoss_step"] = int(df["train_loss"].idxmin())
        d["min_trainLoss_epoch"] = int(df.loc[d["min_trainLoss_step"],"epoch"])
        
        new_file_path = os.path.abspath(os.path.join(path,'run_overview_extended.json'))
        with open(new_file_path, 'w') as f:
            json.dump(d,f)  

Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_356_\run_overview.json
Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_355_\run_overview.json
Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_354_\run_overview.json
Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_353_\run_overview.json
Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_352_\run_overview.json
Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_351_\run_overview.json
Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_350_\run_overview.json
Found file at: G:\Meine Ablage\Masterarbeit\fantastic-umbrella\finished_runs\04_mod_runs\run_modded_349_\run_overview.json
Found file at: G