In [None]:
import os
import glob
import shutil
import csv
import yaml
import re
root_dir = '/home/wandb-runs/pace-v2/copied-dirs-for-real-only'

# Cleaning old erroneous folder formatting 

In [None]:
## the folder above contains multiple CSVs, each corresponding to a single WandB sweep set
## parse the first column (which contains run names belonging to that sweep set) into a dictionary where the key is csv filename (aka sweep set name)
sweep_dict = {}
for csv_file in glob.glob(os.path.join(root_dir, '*.csv')):
    with open(csv_file, 'r') as f:
        reader = csv.reader(f)
        next(reader)  # skip header
        run_names = [row[0] for row in reader]
        sweep_dict[os.path.basename(csv_file).split('.')[0]] = run_names

In [None]:
sweep_dict

In [None]:
## for each sweep set, move all folders within root_dir that match the run names to a new folder named after the sweep set
for sweep_set, run_names in sweep_dict.items():
    sweep_dir = os.path.join(root_dir, sweep_set)
    os.makedirs(sweep_dir, exist_ok=True)
    for run_name in run_names:
        run_dir = os.path.join(root_dir, run_name)
        if os.path.exists(run_dir):
            shutil.move(run_dir, sweep_dir)

In [None]:
## there are folders of the form valX in root_dir. Each one contains a file called simple_evaluation_results.yaml
## in the yaml file, the 'model' entry will contain the sweep name (e.g: model: pace-v2/rare-sweep-8/weights/best.pt contains rare-sweep-8)
## we need to move these val folders into the sweep set directories based on what's in the yaml file
for val_folder in glob.glob(os.path.join(root_dir, 'val*')):
    yaml_file = os.path.join(val_folder, 'simple_evaluation_results.yaml')
    if os.path.exists(yaml_file):
        with open(yaml_file, 'r') as f:
            yaml_content = yaml.safe_load(f)
            sweep_name = yaml_content.get('model', '').split('/')[-3]
            print(f'Moving {val_folder} to {sweep_name}??')
            ## find the sweep set correspodnng to the given sweep name
            search_results = [s for s, names in sweep_dict.items() if sweep_name in names]
            assert len(search_results) == 1, f'Expected 1 sweep set for {sweep_name}, found {len(search_results)}'
            sweep_set = search_results[0]
            target_dir = os.path.join(root_dir, sweep_set)
            shutil.move(val_folder, target_dir)

In [None]:
# count and print the number of sweep directories (of the form [word]-[word]-[number]) and the number of val directories (of the form val[number]) in each sweep set
for sweep_set in sweep_dict.keys():
    sweep_dir = os.path.join(root_dir, sweep_set)
    if os.path.exists(sweep_dir):
        num_sweep_dirs = len([d for d in os.listdir(sweep_dir) if re.match(r'^[\w-]+-[\w-]+-\d+$', d)])
        num_val_dirs = len([d for d in os.listdir(sweep_dir) if re.match(r'^val\d+', d)])
    else:
        num_sweep_dirs = 0
        num_val_dirs = 0
    print(f'Sweep set: {sweep_set}, Number of sweep dirs: {num_sweep_dirs}, Number of val dirs: {num_val_dirs}')

In [None]:
covered_runs