In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

In [3]:
base_dir = Path('/home/jaalbers/software/manada/datasets/')
source_dirs = list(base_dir.glob('nosub_*'))

dest_dir = base_dir / 'nosub'
dest_dir.mkdir(exist_ok=True)
print(source_dirs)

[PosixPath('/home/jaalbers/software/manada/datasets/nosub_0'), PosixPath('/home/jaalbers/software/manada/datasets/nosub_1'), PosixPath('/home/jaalbers/software/manada/datasets/nosub_2'), PosixPath('/home/jaalbers/software/manada/datasets/nosub_3'), PosixPath('/home/jaalbers/software/manada/datasets/nosub_4')]


In [4]:
n_stored = 0
dfs = []

for folder in source_dirs:
    print(folder)
    # Read metadata
    df = pd.read_csv(folder / 'metadata.csv', index_col=False)
    if 'index' in df.columns:
        del df['index']
    
    # Read image filenames
    image_files = list(sorted(folder.glob('*.npy')))
    assert len(image_files) == len(df)

    # Rename files
    df['filename'] = [f'image_{i:07d}.npy'
                      for i in n_stored + np.arange(len(df))]
    for old_path, new_name in zip(image_files, df['filename'].values):
        old_path.rename(dest_dir / new_name)
    
    # Store results
    n_stored += len(df)
    dfs.append(df)
    
df = pd.concat(dfs, ignore_index=True)
df.to_csv(dest_dir / 'metadata.csv', index_label='index')

/home/jaalbers/software/manada/datasets/nosub_0
/home/jaalbers/software/manada/datasets/nosub_1
/home/jaalbers/software/manada/datasets/nosub_2
/home/jaalbers/software/manada/datasets/nosub_3
/home/jaalbers/software/manada/datasets/nosub_4


Check we didn't repeat ourselves (e.g. due to random seed)

In [5]:
x = df['main_deflector_parameters_theta_E'].values

In [6]:
np.unique(x).size, len(x)

(25000, 25000)

In [7]:
x[:10], x[3000:3010]

(array([1.0618936 , 0.97512035, 1.06863097, 0.95450313, 1.18826722,
        0.95076277, 1.0196983 , 1.32589125, 1.08526278, 1.0975628 ]),
 array([1.13509157, 1.60515931, 1.14652177, 1.07673084, 1.36176751,
        1.07313012, 1.04047376, 1.0566274 , 1.03711726, 1.12935627]))