In [None]:
from datetime import datetime
import os
import pandas as pd
import re
import time
from IPython.display import clear_output, display
import pickle

In [None]:
EXPERIMENT_ID = 'circymxx' # Enter the ID of the experiment (name of the folder) to export
interval = 60 # in seconds for monitoring

In [None]:
def experiment_to_csv(path, experiment):
    jobs = pd.read_csv(f'{experiment.path}/design.csv', sep='\t')
    jobs.columns.values[0] = 'Index'
    jobs = jobs[jobs.Index >= 0]
    
    jobs['accuracy'] = 0.0
    jobs['epochs'] = 0
    pat = re.compile("accuracy: (\d+\.\d+)")
    pat_clock = re.compile("Wall clock time is (\d+\.\d+) ms")
    found, not_existing = 0, 0
    for idx, row in jobs.iterrows():
        filename = f"{int(row.Index)}-model{row.model}-epochs{row.max_epochs}-executor_memory{row.executor_memory}-executor_cores{row.executor_cores}.log"
        
        try:
            with open(f'{experiment.path}/{filename}', 'r') as file:
                content = file.read()
                result = pat.findall(content)
                result_wallclock = float(pat_clock.findall(content)[-1])/1000
                jobs.at[idx, 'time'] = result_wallclock
                jobs.at[idx, 'accuracy'] = result[-1] if len(result) > 0 else 0
                jobs.loc[idx, 'epochs'] = len(result)
                found += 1
        except FileNotFoundError as e:
            jobs.at[idx, 'time'] = -1
            jobs.at[idx, 'accuracy'] = -1
            jobs.loc[idx, 'epochs'] = -1
            not_existing += 1
    
    time = os.stat(f'{experiment.path}/design.csv').st_mtime
    dt = datetime.fromtimestamp(time)
    jobs.to_csv(f'ex-{dt.strftime("%Y-%m-%d_%H:%M")}.csv')
    return (jobs, f'ex-{dt.strftime("%Y-%m-%d_%H:%M")}.csv', found, not_existing)

In [None]:
def fetch_results():
    dir = os.scandir('raw/')
    experiments = list(filter(lambda x: x.is_dir() and x.name[0] != '.' and x.name == EXPERIMENT_ID, dir))
    assert len(experiments) > 0, f'The folder {EXPERIMENT_ID} does not exist!'

    for experiment in experiments:
        if os.path.exists(f'{experiment.path}/design.csv'):
            jobs, path, found, not_existing = experiment_to_csv(experiment.path, experiment)
            print(f'Results of {experiment} stored in {path}')
            print(f'{found} of the {found + not_existing} expected log files are present. The other experiments are probably missing.\n')
            df = pd.read_csv(path)
            df = df[['max_epochs', 'executor_memory', 'executor_cores', 'model', 'accuracy', 'time']]
        else:
            raise Exception(f'{experiment.path}/design.csv not found!')
    done = not_existing == 0
    return df, done

In [None]:
df, _ = fetch_results()
df

In [None]:
done = False
while done:
    df, done = fetch_results()
    clear_output(wait=True)
    dt = datetime.fromtimestamp(time.time())
    print(f'Last update: {dt.strftime("%H:%M:%S")}')
    display(df)
    time.sleep(interval)
    
with open(f'{EXPERIMENT_ID}.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)