In [1]:
import os
import json
import pandas as pd
from hydra_utils import read_logs, get_missing_jobs

# Locating the Data
First things first: to get you oriented, let's check where our results are located. Use this script in combination with your command to see where the results should be if your naming matches the example config file.

In [2]:
command_str = "python cli.py 'seed=range(0,10)' method=1,4,7"
seeds = range(0,10)
methods = [1,3,7]
benchmarks = [4]
base_path = "results"
experiment_directories = []

for b in benchmarks:
    for m in methods:
        for s in seeds:
            experiment_directories.append(os.path.join(base_path, f"{m}_{b}", f"{s}"))

print(experiment_directories)

['results/1_4/0', 'results/1_4/1', 'results/1_4/2', 'results/1_4/3', 'results/1_4/4', 'results/1_4/5', 'results/1_4/6', 'results/1_4/7', 'results/1_4/8', 'results/1_4/9', 'results/3_4/0', 'results/3_4/1', 'results/3_4/2', 'results/3_4/3', 'results/3_4/4', 'results/3_4/5', 'results/3_4/6', 'results/3_4/7', 'results/3_4/8', 'results/3_4/9', 'results/7_4/0', 'results/7_4/1', 'results/7_4/2', 'results/7_4/3', 'results/7_4/4', 'results/7_4/5', 'results/7_4/6', 'results/7_4/7', 'results/7_4/8', 'results/7_4/9']


# Checking if Everything Ran Successfully
Now that we know the location of our data, we can check if it's complete. For this purpose, you need to define a function that takes a directory for a single run and returns a boolean signal if this run is complete.

In [24]:
# We need to specify which file to look into, here we write a 'done.txt.'
# For other files we return None
# Other options are things like checking if a checkpoint exists or if performance logging has reached a certain point
def job_done(path_str):
    if path_str.endswith("done.txt"):
        with open(os.path.join(path_str, "done.txt"), "r") as f:
            content = f.read()
            if "yes" in content:
                return True
            else:
                return False
    else:
        return None

In [25]:
missing = get_missing_jobs("results", job_done)

results/1_4/0
/Users/theeimer/Documents/git/automl_repo_template/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/results/1_4/0
[]
None
results/1_4/1
/Users/theeimer/Documents/git/automl_repo_template/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/results/1_4/1
[]
None
results/1_4/2
/Users/theeimer/Documents/git/automl_repo_template/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/results/1_4/2
[]
None
results/1_4/3
/Users/theeimer/Documents/git/automl_repo_template/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/results/1_4/3
[]
None
results/1_4/4
/Users/theeimer/Documents/git/automl_repo_template/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/results/1_4/4
[]
None
results/1_4/5
/Users/theeimer/Documents/git/automl_repo_template/{{cookiecutter.project_slug}}/{{cookiecutter.project_slug}}/results/1_4/5
[]
None
results/1_4/6
/Users/theeimer/Documents/git/automl_repo_template/{{cookiecutter.project_slug}}/{{cookiecutter.project_

# Make Runscripts to Rerun Missing Runs
Since it's possible some runs die before finishing, we need to rerun them at times. Here we can generate scripts to do this.

In [12]:
all_files = []
for b in benchmarks:
    for m in methods:
        for s in seeds:
            filepath = f"missing_method_{m}_benchmark_{b}_seed_{s}.sh"
            command = f"python cli.py seed={s} method={m} benchmark={b}"
            first = True
            with open(filepath, "a+") as f:
                if first:
                    first = False
                    slurm_string = f"""#!/bin/bash \n#SBATCH --error={m}_{b}.err \n#SBATCH --job-name=missing \n#SBATCH --mem=10GB \n#SBATCH --output={m}_{b}.out \n#SBATCH --partition=ai,tnt \n#SBATCH --time=1440 \nconda activate my_env"""
                    f.write(slurm_string)
                    f.write("\n")
                f.write(command)
                f.write("\n")
            all_files.append(filepath)

with open("submit_all_missing.sh", "a+") as f:
    for file in all_files:
        f.write(f"sbatch {file}")
        f.write("\n")

# now we could 'sbatch submit_all_missing.sh' in the terminal to run the missing jobs

# Loading the Data
There are multiple ways to then work with your data. Here we'll provide you with a way to load it into dataframes since we assume this is how you'll want to work with experiment data. If this doesn't work for you, it should be easy to configure, come talk to someone! 
There are three steps to data loading:
1. Write functions loading all result files you're interested in. In this case, we want to load the performance and emissions
2. Write any post-processing you want done with the loading. Here, we want to delete some fields to save disk space.
3. Tell us where to find your results and watch them load :D

In [None]:
# Here we define how and what to load
# We need to specify which files this function applies to and return None for the rest
# Then we can load it into a pandas dataframe however we want
def read_performance(path):
    if os.path.exists(path) and path.endswith("performance.csv"):
        df = pd.read_csv(path)
        return df
    else:
        return None
    
def read_emissions(path):
    if os.path.exists(path) and path.endswith("emissions.csv"):
        with open(path, "r") as f:
            content = json.load(f)
        return content
    else:
        return None

In [None]:
# In this example, we don't care about all of our emissions data, so we delete some of the columns
def drop_columns(df):
    df = df.drop(columns=["gpu_power", "country_name"])
    return df

In [None]:
# Now we can get our data! It will also be saved in a csv file so we won't need to reload it
loaded_data = read_logs("results", [read_performance, read_emissions], [drop_columns], "run_data")
loaded_data.head()