# Export results to CSV

In [11]:
import csv
import os
import os.path as osp
import re

from typing import Any, Iterable, Union

import pandas as pd
import yaml

### Get logdirs

In [12]:
def list_logdirs(log_root: str, maxdeep: int) -> list[str]:
	def list_files_rec(path: str, deep: int) -> list[tuple[str, int]]:
		if osp.isdir(path):
			if maxdeep != -1 and deep >= maxdeep:
				return [(path, deep)]
			else:
				return [(subpath, subdeep) for name in os.listdir(path) for subpath, subdeep in list_files_rec(osp.join(path, name), deep+1)]
		else:
			return []
	files_with_deep = list_files_rec(log_root, 0)
	return [path for path, _ in files_with_deep]

In [13]:
def filter_logdirs(
    logdirs: Iterable[str],
    logdirs_include_patterns: Union[str, Iterable[str]]
) -> list[str]:
    if isinstance(logdirs_include_patterns, str):
        logdirs_include_patterns = [logdirs_include_patterns]
    logdirs_include_patterns = list(map(re.compile, logdirs_include_patterns))  # type: ignore
    included_logdirs = [
        logdir
        for logdir in logdirs
        if any(re.match(pattern, logdir) for pattern in logdirs_include_patterns)
    ]
    return included_logdirs

In [26]:
log_root = "/users/samova/elabbe/root_sslh/SSLH/calmip_logs"
log_root = "/users/samova/elabbe/root_sslh/SSLH/logs"
log_root = "/homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs"
all_logdirs = list_logdirs(log_root, 3)
print(f"{log_root=}")
print(f"{len(all_logdirs)=}")
print(f"{yaml.dump(all_logdirs[:5], sort_keys=False)}")

log_root='/homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs'
len(all_logdirs)=333
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/remixmatch/2022-06-08_10-37-44_NOTAG
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/remixmatch/2022-06-07_18-42-08_22116__data_ssl_ubs8k__pl_remixmatch__epochs_200__rot_size_4__self_transform_mode_hvflips__val_fold_2
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/remixmatch/2022-06-07_18-42-06_22116__data_ssl_ubs8k__pl_remixmatch__epochs_200__rot_size_4__self_transform_mode_hvflips__val_fold_1
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/remixmatch/2022-06-07_18-42-06_22116__data_ssl_ubs8k__pl_remixmatch__epochs_200__rot_size_2__self_transform_mode_hflips__val_fold_1
- /homelocal/labbeti/Desktop/root_aac/OSI_SYNC/root_sslh/SSLH/logs/ssl_ubs8k/remixmatch/2022-06-08_16-05-14_NOTAG



### Filter logdirs

In [27]:
pattern = ".*22116.*esc10.*"
pattern = ".*22116.*gsc.*"
pattern = ".*_21932__data_sup_gsc__.*"
pattern = ".*_32178__data_sup_gsc__.*"
pattern = ".*_18013_.*"
pattern = ".*(_20589_|_12670_).*"
pattern = ".*_20589__data_sup_gsc__.*"
pattern = ".*18013__data_ssl_gsc__pl_remixmatch_.*"
pattern = ".*(R18|R19)-.*"

logdirs = filter_logdirs(all_logdirs, pattern)
print(f"{pattern=}")
print(f"{len(logdirs)=}")

pattern='.*(R18|R19)-.*'
len(logdirs)=15


### Read results

In [28]:
def flat_dict(x) -> dict:
	def flat_lst(x) -> Any:
		if isinstance(x, dict):
			return {k: flat_lst(v) for k, v in x.items()}
		elif isinstance(x, (list, tuple)):
			return {i: flat_lst(v) for i, v in enumerate(x)}	
		else:
			return x

	x = flat_lst(x)
	x = pd.json_normalize(x, sep=".").to_dict(orient='records')[0]
	return x

In [29]:
excluded_values = [".*hp_metric", "hp.slurm.output", "hp.slurm.error"]
column_order_patterns = [".*tag", "met.*acc", ".*val_folds", "met.*duration", "met.*", "hp.seed", ".*"]
line_order = ["hp.tag", "hp.data.dm.val_folds.0"]

results_list = []
for logdir in logdirs:
	results = {}
	skip = False
	files = [
		("met", "metrics.yaml"),
		("hp", "hparams.yaml"),
	]
	
	for prefix, fname in files:
		fpath = osp.join(logdir, fname)
		if not osp.isfile(fpath):
			print(f"Cannot find {fname} in {osp.basename(logdir)}")
			skip = True
			break
		with open(fpath, "r") as file:
			file_results = yaml.safe_load(file)
		
		file_results = flat_dict(file_results)
		file_results = {".".join([prefix, k]): v for k, v in file_results.items()}
		file_results = {
			k: v for k, v in file_results.items()
			if not any(re.match(p, k) for p in excluded_values)
		}
		
		results |= file_results

	if skip:
		continue

	results_ordered = {}
	for p in column_order_patterns:
		results_ordered |= {k: v for k, v in results.items() if k not in results_ordered and re.match(p, k)}
	results_list.append(results_ordered)

df = pd.DataFrame(results_list)
df.sort_values([k for k in line_order if k in df.keys()], ascending=True, inplace=True)
df.index = list(range(len(df)))
df.head(len(df))

Cannot find metrics.yaml in 2023-02-02_10-27-52_R18-data_ssl_esc10-pl_mean_teacher_mixup-bsizes_30_30-buffer_none-stu_weak-val_fold_4
Cannot find metrics.yaml in 2023-02-02_10-27-50_R18-data_ssl_esc10-pl_mean_teacher_mixup-bsizes_30_30-buffer_none-stu_weak-val_fold_2
Cannot find metrics.yaml in 2023-02-02_10-27-54_R18-data_ssl_esc10-pl_mean_teacher_mixup-bsizes_30_30-buffer_none-stu_weak-val_fold_5
Cannot find metrics.yaml in 2023-02-02_10-27-51_R18-data_ssl_esc10-pl_mean_teacher_mixup-bsizes_30_30-buffer_none-stu_weak-val_fold_3
Cannot find metrics.yaml in 2023-02-02_10-27-49_R18-data_ssl_esc10-pl_mean_teacher_mixup-bsizes_30_30-buffer_none-stu_weak-val_fold_1


Unnamed: 0,hp.tag,met.val_best/acc,hp.data.dm.val_folds.0,met.other/fit_duration_h,met.other/test_duration_h,met.val_best/ce,hp.seed,hp.cpus,hp.datetime,hp.debug,...,hp.lambda_u,hp.threshold,hp.aug_s.0.type,hp.aug_s.0.aug._target_,hp.aug_u.0.type,hp.aug_u.0.aug._target_,hp.pl.criterion_u._target_,hp.pl.criterion_u.reduction,hp.pl.lambda_u,hp.pl.threshold
0,R18-data_ssl_esc10-pl_pseudo_labeling_mixup-bs...,0.633333,1,0.670326,0.000327,1.507369,1234,12,2023-02-02_10-27-44,False,...,1.0,0.0,spectrogram,torch.nn.Identity,spectrogram,torch.nn.Identity,sslh.nn.loss.CrossEntropyLossVecTargets,none,1.0,0.0
1,R18-data_ssl_esc10-pl_pseudo_labeling_mixup-bs...,0.591667,2,0.662918,0.000322,1.576136,1234,12,2023-02-02_10-27-45,False,...,1.0,0.0,spectrogram,torch.nn.Identity,spectrogram,torch.nn.Identity,sslh.nn.loss.CrossEntropyLossVecTargets,none,1.0,0.0
2,R18-data_ssl_esc10-pl_pseudo_labeling_mixup-bs...,0.641667,3,0.701033,0.000343,1.509107,1234,12,2023-02-02_10-27-46,False,...,1.0,0.0,spectrogram,torch.nn.Identity,spectrogram,torch.nn.Identity,sslh.nn.loss.CrossEntropyLossVecTargets,none,1.0,0.0
3,R18-data_ssl_esc10-pl_pseudo_labeling_mixup-bs...,0.7,4,0.637857,0.00031,1.062285,1234,12,2023-02-02_10-27-47,False,...,1.0,0.0,spectrogram,torch.nn.Identity,spectrogram,torch.nn.Identity,sslh.nn.loss.CrossEntropyLossVecTargets,none,1.0,0.0
4,R18-data_ssl_esc10-pl_pseudo_labeling_mixup-bs...,0.633333,5,0.635577,0.000311,1.452418,1234,12,2023-02-02_10-27-48,False,...,1.0,0.0,spectrogram,torch.nn.Identity,spectrogram,torch.nn.Identity,sslh.nn.loss.CrossEntropyLossVecTargets,none,1.0,0.0
5,R19-data_ssl_esc10-pl_mean_teacher_mixup-bsize...,0.658333,1,1.081298,0.000356,1.0694,1234,12,2023-02-02_13-32-15,False,...,,,,,,,,,,
6,R19-data_ssl_esc10-pl_mean_teacher_mixup-bsize...,0.725,2,1.08148,0.000354,1.039509,1234,12,2023-02-02_13-32-17,False,...,,,,,,,,,,
7,R19-data_ssl_esc10-pl_mean_teacher_mixup-bsize...,0.633333,3,0.989627,0.000327,1.204974,1234,12,2023-02-02_13-32-18,False,...,,,,,,,,,,
8,R19-data_ssl_esc10-pl_mean_teacher_mixup-bsize...,0.766667,4,1.0018,0.000348,0.898422,1234,12,2023-02-02_13-32-19,False,...,,,,,,,,,,
9,R19-data_ssl_esc10-pl_mean_teacher_mixup-bsize...,0.725,5,0.99707,0.000334,0.939944,1234,12,2023-02-02_13-32-20,False,...,,,,,,,,,,


### Export to csv

In [30]:
def export_to_csv(df: pd.DataFrame, fpath: str) -> None:
	lst_dic = df.to_dict("records")
	keys = list(df.keys())

	with open(fpath, "w") as file:
		writer = csv.DictWriter(file, fieldnames=keys)
		writer.writeheader()
		writer.writerows(lst_dic)
	print(f"Export {len(df)} results in {fpath=}")

In [31]:
export_to_csv(df, "nb_data/results_sslh.ign.csv")

Export 10 results in fpath='nb_data/results_sslh.ign.csv'
