In [1]:
import os

def find_directories_with_hydra_and_results(root_dir):
    with_results = []
    without_results = []

    for dirpath, dirnames, filenames in os.walk(root_dir):
        if '.hydra' in dirnames:
            if 'results.yaml' in filenames:
                with_results.append(dirpath)
            else:
                without_results.append(dirpath)

    return with_results, without_results

def strip_basename(paths):
    return [os.path.dirname(path) for path in paths]

root_directory = '/home/foresti/minde/minde/evaluate/outputs/2025-03-03/minde.minde.MINDE'  # Replace with your root directory path
with_results, without_results = find_directories_with_hydra_and_results(root_directory)

# Strip the basename from all paths
with_results = strip_basename(with_results)
without_results = strip_basename(without_results)


# Remove from without_results all the paths that are also in with_results
without_results = [path for path in without_results if path not in with_results]

print("Directories with .hydra and results.yaml:")
for path in with_results:
    print(path)

with_results_by_mutinfo = {}

for path in with_results:
    mutinfo = os.path.basename(path)
    if mutinfo not in with_results_by_mutinfo:
        with_results_by_mutinfo[mutinfo] = []
    else:
        with_results_by_mutinfo[mutinfo].append(path)

print("\nDirectories with .hydra and results.yaml grouped by mutinfo:")
for mutinfo, paths in with_results_by_mutinfo.items():
    print(mutinfo)
    for path in paths:
        print(path)

print("\nDirectories with .hydra but without results.yaml:")
for path in without_results:
    print(path)

Directories with .hydra and results.yaml:
/home/foresti/minde/minde/evaluate/outputs/2025-03-03/minde.minde.MINDE/mutinfo.distributions.base.CorrelatedStudent/model=mlp/type=c/X_dim=1__Y_dim=1/dof=2/N=100/n_runs=10/target_mutinfo=9.0
/home/foresti/minde/minde/evaluate/outputs/2025-03-03/minde.minde.MINDE/mutinfo.distributions.base.CorrelatedStudent/model=mlp/type=c/X_dim=1__Y_dim=1/dof=2/N=100/n_runs=10/target_mutinfo=10.0
/home/foresti/minde/minde/evaluate/outputs/2025-03-03/minde.minde.MINDE/mutinfo.distributions.base.CorrelatedStudent/model=mlp/type=c/X_dim=1__Y_dim=1/dof=2/N=500/n_runs=10/target_mutinfo=9.0
/home/foresti/minde/minde/evaluate/outputs/2025-03-03/minde.minde.MINDE/mutinfo.distributions.base.CorrelatedStudent/model=mlp/type=c/X_dim=1__Y_dim=1/dof=2/N=500/n_runs=10/target_mutinfo=10.0
/home/foresti/minde/minde/evaluate/outputs/2025-03-03/minde.minde.MINDE/mutinfo.distributions.base.CorrelatedStudent/model=mlp/type=c/X_dim=1__Y_dim=1/dof=2/N=1000/n_runs=10/target_mutinfo

In [None]:
import os
import yaml
import csv
from itertools import product

def find_directories_with_hydra_and_results(root_dir):
    with_results = []

    for dirpath, dirnames, filenames in os.walk(root_dir):
        if '.hydra' in dirnames and 'results.yaml' in filenames:
            with_results.append(dirpath)

    return with_results

def extract_information(dirpath):
    config_path = os.path.join(dirpath, '.hydra', 'config.yaml')
    results_path = os.path.join(dirpath, 'results.yaml')

    with open(config_path, 'r') as config_file:
        config = yaml.safe_load(config_file)

    with open(results_path, 'r') as results_file:
        results = yaml.safe_load(results_file)

    info = {
        'dimensionality': config['dimensionality'],
        'degrees_of_freedom': config['distribution']['degrees_of_freedom'],
        'mutual_information': config['distribution']['mutual_information'],
        'type': config['estimator']['args']['inference']['type'],
        'n_samples': config['n_samples'],
        'mean_mi': results['mutual_information']['mean']['mi'],
        'mean_mi_sigma': results['mutual_information']['mean']['mi_sigma'],
        'std_mi': results['mutual_information']['std']['mi'],
        'std_mi_sigma': results['mutual_information']['std']['mi_sigma']
    }

    return info

def save_to_csv(data, output_file):
    keys = data[0].keys()
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)

def check_completeness(data, all_combinations):
    existing_combinations = {(d['dimensionality'], d['degrees_of_freedom'], d['mutual_information'], d['type'], d['n_samples']) for d in data}
    missing_combinations = all_combinations - existing_combinations
    return missing_combinations

root_directory = '/home/foresti/minde/minde/evaluate/results_t_student_imp_sampl'  # Replace with your root directory path
output_csv = '/home/foresti/minde/results_prova.csv'  # Replace with your desired output CSV file path

directories = find_directories_with_hydra_and_results(root_directory)
data = [extract_information(dirpath) for dirpath in directories]

save_to_csv(data, output_csv)

# Define all possible combinations
mutual_information_values = range(1,11)  # Add all possible values
dimensionality_values = range(1, 4)
degrees_of_freedom_values = [1,2,3,4,5]  # Add all possible values
type_values = ['j', 'c']  # Add all possible values
n_samples_values = [100, 500, 1000, 2000, 5000]  # Add all possible values

all_combinations = set(product(dimensionality_values, degrees_of_freedom_values, mutual_information_values, type_values, n_samples_values))

missing_combinations = check_completeness(data, all_combinations)

if missing_combinations:
    print("Missing combinations:")
    for combination in missing_combinations:
        print(combination)
else:
    print("All combinations are present.")

print(f"Information saved to {output_csv}")

Missing combinations:
(2, 3, 0, 'j', 100)
(3, 5, 0, 'j', 100)
(3, 2, 0, 'c', 5000)
(3, 1, 0, 'c', 500)
(3, 5, 0, 'c', 2000)
(3, 1, 0, 'c', 100)
(1, 5, 0, 'j', 1000)
(1, 2, 0, 'j', 1000)
(1, 5, 0, 'c', 2000)
(2, 1, 0, 'j', 500)
(3, 3, 0, 'c', 500)
(1, 4, 0, 'j', 100)
(1, 1, 0, 'j', 100)
(2, 1, 0, 'j', 100)
(2, 5, 0, 'c', 5000)
(3, 3, 0, 'c', 100)
(1, 3, 0, 'c', 500)
(2, 2, 0, 'j', 500)
(1, 3, 0, 'c', 1000)
(3, 1, 0, 'j', 2000)
(2, 5, 0, 'c', 1000)
(1, 2, 0, 'c', 2000)
(1, 3, 0, 'j', 5000)
(2, 2, 0, 'j', 1000)
(3, 4, 0, 'c', 1000)
(2, 4, 0, 'j', 5000)
(1, 3, 0, 'j', 1000)
(2, 4, 0, 'j', 1000)
(3, 5, 0, 'c', 500)
(3, 4, 0, 'j', 2000)
(2, 2, 0, 'c', 2000)
(1, 5, 0, 'c', 500)
(2, 3, 0, 'j', 500)
(3, 5, 0, 'c', 100)
(3, 5, 0, 'j', 500)
(3, 2, 0, 'j', 500)
(2, 3, 0, 'j', 1000)
(1, 5, 0, 'c', 100)
(2, 4, 0, 'c', 2000)
(3, 2, 0, 'j', 100)
(3, 1, 0, 'c', 5000)
(3, 1, 0, 'j', 500)
(1, 5, 0, 'j', 5000)
(1, 2, 0, 'c', 500)
(1, 2, 0, 'j', 5000)
(3, 1, 0, 'c', 1000)
(2, 1, 0, 'j', 5000)
(3, 1, 0, 'j'

In [3]:
for i in range(1,11):
    print(i, ":", len(list(filter(lambda x: x[2]==i,missing_combinations))))

1 : 0
2 : 0
3 : 0
4 : 0
5 : 0
6 : 0
7 : 0
8 : 0
9 : 0
10 : 0
