In [2]:
#!pip install tensorboard tensorflow pandas

"""
If tensorboard is not installed (or other dependencies, such as tensorflow and pandas),
uncomment the command in top and re-run. This needs only to be run once in a Jupyter kernel.
"""

%load_ext tensorboard

from tensorflow.python.summary.summary_iterator import summary_iterator
import pandas as pd
from slugify import slugify
import numpy as np

In [None]:
"""
Change the LOG_DIR argument to point to the correct directory, you may want to use an
absolute path if you run into issues.
"""
%tensorboard --logdir ../logging

In [3]:
def logs_to_pandas(path: str) -> pd.DataFrame:
    """convert single tensorflow log file to pandas DataFrame
    Parameters
    ----------
    path : str
        path to tensorflow log file
    Returns
    -------
    pd.DataFrame
        converted dataframe
    """

    runlog_data = pd.DataFrame({"metric": [], "value": [], "step": [], "wall_time": []})
    try:
        event_acc = summary_iterator(path)
        for event in list(event_acc)[1:]:
            step, wall_time = event.step, pd.to_datetime(event.wall_time, unit='s')
            simple_extractor = [{"metric": v.tag, "value": v.simple_value, "step": step, 'wall_time': wall_time} for v in event.summary.value]
            event_r = pd.DataFrame(simple_extractor)
            runlog_data = pd.concat([runlog_data, event_r])
    # Dirty catch of DataLossError
    except Exception as e:
        print("Event file possibly corrupt: {}".format(path))
        print(e)
    return runlog_data

In [22]:
d_columns = ["start_time", "end_time", "total_time", "accuracy_per_epoch", "training_loss_per_epoch", "step"]
def get_job_data(path):
  result = logs_to_pandas(path)

  # set all values in wall time to the minimum value with the same step
  result.wall_time = result.groupby("step").wall_time.transform("min")
  d = result.pivot_table(index=["step", "wall_time"], columns="metric", values="value").reset_index()
  d.dropna(inplace=True)
  # flatten multidex of  dataframe
  d.rename(columns=lambda x: slugify(x, separator="_"), inplace=True)
  d.columns.name=None
  start_time = d.wall_time.min()
  end_time = d.wall_time.max()
  total_time = end_time - start_time

  d["total_time"] = total_time.total_seconds()
  d["start_time"] = start_time
  d["end_time"] = end_time
  a = d[d.step == d.step.max()][d_columns]

  return a

base_path = "../logging/reg_lenet_baseline_20/0"
net_path = "0/Nets.mnist_lenet_Dataset.mnist"
# net_path = "0/Nets.mnist_cnn_Dataset.mnist"

# get all folders in logging directory and get the data for each job
data = pd.DataFrame(columns=d_columns)
for folder in os.listdir(base_path):
  path = os.path.join(base_path, folder, net_path)
  path = os.path.join(path, os.listdir(path)[0])
  a = get_job_data(path)
  data = pd.concat([data, a])


data.sort_values("end_time", inplace=True)
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,start_time,end_time,total_time,accuracy_per_epoch,training_loss_per_epoch,step
0,2022-11-01 20:33:27.279617280,2022-11-01 20:41:50.768099584,503.488482,85.239998,0.217292,20.0
1,2022-11-01 20:42:59.100616448,2022-11-01 20:51:24.871062784,505.770446,86.720001,0.206736,20.0
2,2022-11-01 20:52:38.277006592,2022-11-01 21:00:58.619353088,500.342346,86.559998,0.204996,20.0
3,2022-11-01 21:02:10.482034688,2022-11-01 21:10:29.571946752,499.089912,86.480003,0.213674,20.0
4,2022-11-01 21:11:36.981936384,2022-11-01 21:20:14.100646912,517.11871,85.199997,0.215763,20.0


In [24]:
data.accuracy_per_epoch

0    85.239998
1    86.720001
2    86.559998
3    86.480003
4    85.199997
Name: accuracy_per_epoch, dtype: float64

In [14]:
def write_to_sheets(data, path, columns):
  string_builder = []
  for i, x in enumerate(data):
    if i != 0 and i % columns == 0:
      string_builder.append("\n")
    elif i % columns != 0:
      string_builder.append("\u0009")
    string_builder.append(str(x))
  output = "".join(string_builder)
  with open(path, "w") as f:
    f.write(output)

write_to_sheets(data.total_time.values, "test.txt", 3)




In [17]:
data[["step", "total_time", "accuracy_per_epoch"]].to_csv("text.txt", sep="\t", index=False)
