# Export results to CSV

In [20]:
import csv
import os
import os.path as osp
import re

import pandas as pd
import yaml

### Get logdirs

In [28]:
def list_logdirs(log_root: str, maxdeep: int) -> list[str]:
	def list_files_rec(path: str, deep: int) -> list[tuple[str, int]]:
		if osp.isdir(path):
			if maxdeep != -1 and deep >= maxdeep:
				return [(path, deep)]
			else:
				return [(subpath, subdeep) for name in os.listdir(path) for subpath, subdeep in list_files_rec(osp.join(path, name), deep+1)]
		else:
			return []
	files_with_deep = list_files_rec(log_root, 0)
	return [path for path, _ in files_with_deep]

In [29]:
log_root = "/users/samova/elabbe/root_sslh/SSLH/logs"
all_logdirs = list_logdirs(log_root, 3)
print(f"{log_root=}")
print(f"{len(all_logdirs)=}")

log_root='/users/samova/elabbe/root_sslh/SSLH/logs'
len(all_logdirs)=113


### Filter logdirs

In [30]:
pattern = ".*22116.*esc10.*"
pattern = ".*22116.*gsc.*"
pattern = ".*_21932__data_sup_gsc__.*"
pattern = ".*_32178__data_sup_gsc__.*"

logdirs = [path for path in all_logdirs if re.match(pattern, path)]
print(f"{pattern=}")
print(f"{len(logdirs)=}")

pattern='.*_32178__data_sup_gsc__.*'
len(logdirs)=15


### Read results

In [31]:
def flat_dict(x) -> dict:
	def flat_lst(x):
		if isinstance(x, dict):
			return {k: flat_lst(v) for k, v in x.items()}
		elif isinstance(x, (list, tuple)):
			return {i: flat_lst(v) for i, v in enumerate(x)}	
		else:
			return x

	x = flat_lst(x)
	x = pd.json_normalize(x, sep=".").to_dict(orient='records')[0]
	return x

In [32]:
excluded_values = [".*hp_metric"]
column_order_patterns = [".*tag", "metrics.*acc", ".*val_folds", "metrics.*duration", "metrics.*", "hparams.seed", ".*"]
line_order = ["hparams.tag", "hparams.data.dm.val_folds.0"]

results_list = []
for logdir in logdirs:
	results = {}
	skip = False
	files = [
		("metrics", "metrics.yaml"),
		("hparams", "hparams.yaml"),
	]
	
	for prefix, fname in files:
		fpath = osp.join(logdir, fname)
		if not osp.isfile(fpath):
			print(f"Cannot find {fname} in {osp.basename(logdir)}")
			skip = True
			break
		with open(fpath, "r") as file:
			file_results = yaml.safe_load(file)
		
		file_results = flat_dict(file_results)
		file_results = {".".join([prefix, k]): v for k, v in file_results.items()}
		file_results = {
			k: v for k, v in file_results.items()
			if not any(re.match(p, k) for p in excluded_values)
		}
		
		results |= file_results

	if skip:
		continue

	results_ordered = {}
	for p in column_order_patterns:
		results_ordered |= {k: v for k, v in results.items() if k not in results_ordered and re.match(p, k)}
	results_list.append(results_ordered)

df = pd.DataFrame(results_list)
df.sort_values([k for k in line_order if k in df.keys()], ascending=True, inplace=True)
df.index = list(range(len(df)))
df.head(len(df))

Unnamed: 0,hparams.tag,metrics.test_best/acc,metrics.val_best/acc,metrics.fit_duration_h,metrics.test_duration_h,metrics.test_best/ce,metrics.val_best/ce,hparams.seed,hparams.cpus,hparams.datetime,...,hparams.train_aug.1.aug.align,hparams.train_aug.1.aug.p,hparams.train_aug.2.type,hparams.train_aug.2.aug._target_,hparams.train_aug.2.aug.freq_scales.0,hparams.train_aug.2.aug.freq_scales.1,hparams.train_aug.2.aug.time_scales.0,hparams.train_aug.2.aug.time_scales.1,hparams.train_aug.2.aug.fill_value,hparams.train_aug.2.aug.p
0,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.964106,0.965134,0.001907,0.001906,0.169795,0.179427,1234,5,2022-06-08_18-18-05,...,,,,,,,,,,
1,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.966468,0.967038,0.0019,0.001899,0.170581,0.180529,1234,5,2022-06-08_18-18-07,...,,,,,,,,,,
2,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.966652,0.968242,0.001876,0.001874,0.160178,0.165293,1234,5,2022-06-08_18-18-08,...,,,,,,,,,,
3,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.965105,0.965936,0.002012,0.002011,0.15967,0.170144,1234,5,2022-06-08_18-18-08,...,,,,,,,,,,
4,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.964107,0.966142,0.00192,0.001919,0.174237,0.175547,1234,5,2022-06-08_18-18-09,...,,,,,,,,,,
5,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.965018,0.969243,0.001879,0.001877,0.152792,0.151393,1234,5,2022-06-08_18-18-10,...,,,,,,,,,,
6,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.967104,0.966234,0.001863,0.001861,0.157718,0.174585,1234,5,2022-06-08_18-18-11,...,,,,,,,,,,
7,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.965383,0.967639,0.001904,0.001903,0.171396,0.168376,1234,5,2022-06-08_18-18-11,...,,,,,,,,,,
8,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.965018,0.966533,0.001851,0.001849,0.155636,0.162267,1234,5,2022-06-08_18-18-12,...,,,,,,,,,,
9,32178__data_sup_gsc__pl_mixup_mix_label__epoch...,0.967019,0.967843,0.001697,0.001696,0.160057,0.167769,1234,5,2022-06-08_18-18-13,...,,,,,,,,,,


### Export to csv

In [35]:
def export_to_csv(df: pd.DataFrame, fpath: str) -> None:
	lst_dic = df.to_dict("records")
	keys = list(df.keys())

	with open(fpath, "w") as file:
		writer = csv.DictWriter(file, fieldnames=keys)
		writer.writeheader()
		writer.writerows(lst_dic)
	print(f"Export {len(df)} results in {fpath=}")

In [37]:
export_to_csv(df, "results_sslh.ign.csv")

Export 15 results in fpath='results_sslh.ign.csv'
