# Export results to CSV

In [1]:
import csv
import os
import os.path as osp
import re

from typing import Any, Iterable, Union

import pandas as pd
import yaml

### Get logdirs

In [11]:
def list_logdirs(log_root: str, maxdeep: int) -> list[str]:
	def list_files_rec(path: str, deep: int) -> list[tuple[str, int]]:
		if osp.isdir(path):
			if maxdeep != -1 and deep >= maxdeep:
				return [(path, deep)]
			else:
				return [(subpath, subdeep) for name in os.listdir(path) for subpath, subdeep in list_files_rec(osp.join(path, name), deep+1)]
		else:
			return []
	files_with_deep = list_files_rec(log_root, 0)
	return [path for path, _ in files_with_deep]

In [12]:
def filter_logdirs(
    logdirs: Iterable[str],
    logdirs_include_patterns: Union[str, Iterable[str]]
) -> list[str]:
    if isinstance(logdirs_include_patterns, str):
        logdirs_include_patterns = [logdirs_include_patterns]
    logdirs_include_patterns = list(map(re.compile, logdirs_include_patterns))  # type: ignore
    included_logdirs = [
        logdir
        for logdir in logdirs
        if any(re.match(pattern, logdir) for pattern in logdirs_include_patterns)
    ]
    return included_logdirs

In [13]:
log_root = "/users/samova/elabbe/root_sslh/SSLH/calmip_logs"
log_root = "/users/samova/elabbe/root_sslh/SSLH/logs"
log_root = "/homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs"
all_logdirs = list_logdirs(log_root, 3)
print(f"{log_root=}")
print(f"{len(all_logdirs)=}")
print(f"{yaml.dump(all_logdirs[:5], sort_keys=False)}")

log_root='/homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs'
len(all_logdirs)=433
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/mean_teacher_mixup/2023-02-02_16-49-59_R20-data_ssl_ubs8k-pl_mean_teacher_mixup-bsizes_128_128--val_fold_6
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/mean_teacher_mixup/2023-02-02_16-49-58_R20-data_ssl_ubs8k-pl_mean_teacher_mixup-bsizes_128_128--val_fold_5
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/mean_teacher_mixup/2023-02-02_16-50-02_R20-data_ssl_ubs8k-pl_mean_teacher_mixup-bsizes_128_128--val_fold_8
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/mean_teacher_mixup/2023-02-02_16-50-04_R20-data_ssl_ubs8k-pl_mean_teacher_mixup-bsizes_128_128--val_fold_10
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/mean_teacher_mixup/2023-02-02_16-49-57_R20-data_ssl_ubs8k-pl_mean_teacher_mixup-bsizes_12

### Filter logdirs

In [14]:
pattern = ".*(R22)-.*"

logdirs = filter_logdirs(all_logdirs, pattern)
print(f"{pattern=}")
print(f"{len(logdirs)=}")

pattern='.*(R22)-.*'
len(logdirs)=4


### Read results

In [15]:
def flat_dict(x) -> dict:
	def flat_lst(x) -> Any:
		if isinstance(x, dict):
			return {k: flat_lst(v) for k, v in x.items()}
		elif isinstance(x, (list, tuple)):
			return {i: flat_lst(v) for i, v in enumerate(x)}	
		else:
			return x

	x = flat_lst(x)
	x = pd.json_normalize(x, sep=".").to_dict(orient='records')[0]
	return x

In [16]:
excluded_values = [".*hp_metric", "hp.slurm.output", "hp.slurm.error"]
column_order_patterns = [".*tag", "met.*acc", ".*val_folds", "met.*duration", "met.*", "hp.seed", ".*"]
line_order = ["hp.tag", "hp.data.dm.val_folds.0"]

results_list = []
for logdir in logdirs:
	results = {}
	skip = False
	files = [
		("met", "metrics.yaml"),
		("hp", "hparams.yaml"),
	]
	
	for prefix, fname in files:
		fpath = osp.join(logdir, fname)
		if not osp.isfile(fpath):
			print(f"Cannot find {fname} in {osp.basename(logdir)}")
			skip = True
			break
		with open(fpath, "r") as file:
			file_results = yaml.safe_load(file)
		
		file_results = flat_dict(file_results)
		file_results = {".".join([prefix, k]): v for k, v in file_results.items()}
		file_results = {
			k: v for k, v in file_results.items()
			if not any(re.match(p, k) for p in excluded_values)
		}
		
		results |= file_results

	if skip:
		continue

	results_ordered = {}
	for p in column_order_patterns:
		results_ordered |= {k: v for k, v in results.items() if k not in results_ordered and re.match(p, k)}
	results_list.append(results_ordered)

df = pd.DataFrame(results_list)
df.sort_values([k for k in line_order if k in df.keys()], ascending=True, inplace=True)
df.index = list(range(len(df)))
df.head(len(df))

Unnamed: 0,hp.tag,met.val_best/acc,hp.data.dm.val_folds.0,met.other/fit_duration_h,met.other/test_duration_h,met.val_best/ce,hp.seed,hp.datetime,hp.debug,hp.epochs,...,hp.strong_aug.2.aug.fill_value,hp.strong_aug.2.aug.p,hp.pl.threshold,hp.criterion,hp.train_aug.0.type,hp.train_aug.0.aug._target_,hp.data.dm.bsize,hp.data.dm.ratio,hp.pl.criterion._target_,hp.pl.criterion.reduction
0,R22-data_-pl_fixmatch-epochs_10-bsizes_128_128...,0.703971,1,0.498648,0.003983,-0.673017,1234,2023-02-22_15-45-01,False,10,...,-80.0,1.0,0.95,,,,,,,
1,R22-data_-pl_mixmatch-epochs_10-bsizes_128_128...,0.697619,1,0.497031,0.003801,-0.521554,1234,2023-02-22_15-45-00,False,10,...,,,,,,,,,,
2,R22-data_-pl_supervised-epochs_10-bsize_128-SU...,0.637436,1,0.03664,0.004307,-0.489522,1234,2023-02-22_15-44-57,False,10,...,,,,CrossEntropyLossVecTargets,spectrogram,torch.nn.Identity,128.0,0.1,sslh.nn.loss.CrossEntropyLossVecTargets,mean
3,R22-data_-pl_supervised-epochs_10-bsize_128-SU...,0.722024,1,0.26412,0.004179,-0.688925,1234,2023-02-22_15-44-59,False,10,...,,,,CrossEntropyLossVecTargets,spectrogram,torch.nn.Identity,128.0,1.0,sslh.nn.loss.CrossEntropyLossVecTargets,mean


### Export to csv

In [17]:
def export_to_csv(df: pd.DataFrame, fpath: str) -> None:
	lst_dic = df.to_dict("records")
	keys = list(df.keys())

	with open(fpath, "w") as file:
		writer = csv.DictWriter(file, fieldnames=keys)
		writer.writeheader()
		writer.writerows(lst_dic)
	print(f"Export {len(df)} results in {fpath=}")

In [18]:
export_to_csv(df, "nb_data/results_sslh.ign.csv")

Export 4 results in fpath='nb_data/results_sslh.ign.csv'
