In [1]:
import os
import numpy as np
from shutil import copytree
dataset_output = '/scratch/ci411/SELD_Datasets'

In [2]:
def generate_dataset(name, output_dir=dataset_output):
    dataset_dir = os.path.join(output_dir, name)
    if not os.path.exists(dataset_dir):
        os.mkdir(dataset_dir)
    default_dirs = ['mic_dev', 'foa_dev', 'metadata_dev']
    for directory in default_dirs:
        path = os.path.join(dataset_dir, directory)
        if not os.path.exists(path):
            os.mkdir(path)
    print(f"Dataset created at {dataset_dir}")
    return dataset_dir

In [3]:
def check_folds(root):
    all_folds = []
    for path, subdirs, files in os.walk(root):
        fold_list = [int(file[4]) for file in files]
        folds = list(set(fold_list))
        print(f"Folds {folds} in {path}")
        all_folds += folds
    all_folds = set(all_folds)
    return all_folds

In [4]:
check_folds('/scratch/ci411/STARSS2022/mic_dev')

Folds [] in /scratch/ci411/STARSS2022/mic_dev
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-tau
Folds [3] in /scratch/ci411/STARSS2022/mic_dev/dev-train-tau
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-sony
Folds [3] in /scratch/ci411/STARSS2022/mic_dev/dev-train-sony


{3, 4}

In [5]:
check_folds('/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/metadata')

Folds [1, 2] in /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/metadata


{1, 2}

In [6]:
def move_data(set_name, source_aud, source_meta, dataset_target):
    aud_files = os.listdir(source_aud)
    meta_files = os.listdir(source_meta)
    aud_filenames = [file.split('.')[0] for file in aud_files]
    meta_filenames = [file.split('.')[0] for file in meta_files]
    aud_filenames.sort()
    meta_filenames.sort()
    assert aud_filenames==meta_filenames
    
    existing_folds = check_folds(os.path.join(dataset_target, 'mic_dev'))
    adding_folds = check_folds(source_aud)
    if bool(adding_folds & existing_folds):
        print(f"WARNING: Mixing folds from source:{adding_folds} and target:{existing_folds} datasets")
    
    dest_aud = os.path.join(dataset_target, 'mic_dev', set_name)
    dest_meta = os.path.join(dataset_target, 'metadata_dev', set_name)
    
    print(f"Copying from {source_aud} to {dest_aud}")
    copytree(source_aud, dest_aud, dirs_exist_ok=True)
    print(f"Copying from {source_meta} to {dest_meta}")
    copytree(source_meta, dest_meta, dirs_exist_ok=True)
    

In [15]:
dataset_name = 'TNR_0518'
dataset_dir = generate_dataset(dataset_name)

tnr_aud = '/scratch/ci411/DCASE_GEN/mixtures/nigens_tau_real/target_noiseless/mic'
tnr_meta = '/scratch/ci411/DCASE_GEN/mixtures/nigens_tau_real/metadata'
move_data('tnr_real', tnr_aud, tnr_meta, dataset_dir)

starss_subfolders = ['dev-test-sony','dev-train-sony','dev-test-tau','dev-train-tau']
starss_aud_dir = '/scratch/ci411/STARSS2022/mic_dev'
starss_meta_dir = '/scratch/ci411/STARSS2022/metadata_dev'

for subfolder in starss_subfolders:
    aud_dir = os.path.join(starss_aud_dir, subfolder)
    meta_dir = os.path.join(starss_meta_dir, subfolder)
    move_data('STARSS2022', aud_dir, meta_dir, dataset_dir)

Dataset created at /scratch/ci411/SELD_Datasets/TNR_0518
Folds [] in /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev/tnr_real
Folds [3, 4] in /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev/STARSS2022
Folds [1, 2] in /scratch/ci411/DCASE_GEN/mixtures/nigens_tau_real/target_noiseless/mic
Copying from /scratch/ci411/DCASE_GEN/mixtures/nigens_tau_real/target_noiseless/mic to /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev/tnr_real
Copying from /scratch/ci411/DCASE_GEN/mixtures/nigens_tau_real/metadata to /scratch/ci411/SELD_Datasets/TNR_0518/metadata_dev/tnr_real
Folds [] in /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev/tnr_real
Folds [3, 4] in /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev/STARSS2022
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-sony
Copying from /scratch/ci411/STARSS2022/mic_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TNR_0518/mic_dev/STAR

In [12]:
dataset_name = 'TNS_0518'
dataset_dir = generate_dataset(dataset_name)

tnr_aud = '/scratch/ci411/DCASE_GEN/mixtures/nigens_tau_sim/target_noiseless/mic'
tnr_meta = '/scratch/ci411/DCASE_GEN/mixtures/nigens_tau_sim/metadata'
move_data('tnr_real', tnr_aud, tnr_meta, dataset_dir)

starss_subfolders = ['dev-test-sony','dev-train-sony','dev-test-tau','dev-train-tau']
starss_aud_dir = '/scratch/ci411/STARSS2022/mic_dev'
starss_meta_dir = '/scratch/ci411/STARSS2022/metadata_dev'

for subfolder in starss_subfolders:
    aud_dir = os.path.join(starss_aud_dir, subfolder)
    meta_dir = os.path.join(starss_meta_dir, subfolder)
    move_data('STARSS2022', aud_dir, meta_dir, dataset_dir)

Dataset created at /scratch/ci411/SELD_Datasets/TNR_Simulated
Folds [] in /scratch/ci411/SELD_Datasets/TNR_Simulated/mic_dev
Folds [1, 2] in /scratch/ci411/DCASE_GEN/mixtures/nigens_tau_sim/target_noiseless/mic
Copying from /scratch/ci411/DCASE_GEN/mixtures/nigens_tau_sim/target_noiseless/mic to /scratch/ci411/SELD_Datasets/TNR_Simulated/mic_dev/tnr_real
Copying from /scratch/ci411/DCASE_GEN/mixtures/nigens_tau_sim/metadata to /scratch/ci411/SELD_Datasets/TNR_Simulated/metadata_dev/tnr_real
Folds [] in /scratch/ci411/SELD_Datasets/TNR_Simulated/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TNR_Simulated/mic_dev/tnr_real
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-sony
Copying from /scratch/ci411/STARSS2022/mic_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TNR_Simulated/mic_dev/STARSS2022
Copying from /scratch/ci411/STARSS2022/metadata_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TNR_Simulated/metadata_dev/STARSS2022
Folds [] in /scratch/ci411/SELD_Datasets/T

In [9]:
dataset_name = 'TFR_0527'
dataset_dir = generate_dataset(dataset_name)

tfr_aud = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/target_noiseless/mic'
tfr_meta = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/metadata'
move_data('tfr_real', tfr_aud, tfr_meta, dataset_dir)

starss_subfolders = ['dev-test-sony','dev-train-sony','dev-test-tau','dev-train-tau']
starss_aud_dir = '/scratch/ci411/STARSS2022/mic_dev'
starss_meta_dir = '/scratch/ci411/STARSS2022/metadata_dev'

for subfolder in starss_subfolders:
    aud_dir = os.path.join(starss_aud_dir, subfolder)
    meta_dir = os.path.join(starss_meta_dir, subfolder)
    move_data('STARSS2022', aud_dir, meta_dir, dataset_dir)

Dataset created at /scratch/ci411/SELD_Datasets/TFR_0527
Folds [] in /scratch/ci411/SELD_Datasets/TFR_0527/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TFR_0527/mic_dev/tnr_real
Folds [1, 2] in /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/target_noiseless/mic
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/target_noiseless/mic to /scratch/ci411/SELD_Datasets/TFR_0527/mic_dev/tfr_real
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/metadata to /scratch/ci411/SELD_Datasets/TFR_0527/metadata_dev/tfr_real
Folds [] in /scratch/ci411/SELD_Datasets/TFR_0527/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TFR_0527/mic_dev/tnr_real
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TFR_0527/mic_dev/tfr_real
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-sony
Copying from /scratch/ci411/STARSS2022/mic_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TFR_0527/mic_dev/STARSS2022
Copying from /scratch/ci411/STARSS2022/metadata_dev/dev-test-sony to

In [10]:
dataset_name = 'TFS_0527'
dataset_dir = generate_dataset(dataset_name)

tfs_aud = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/target_noiseless/mic'
tfs_meta = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/metadata'
move_data('tfs_real', tfs_aud, tfs_meta, dataset_dir)

starss_subfolders = ['dev-test-sony','dev-train-sony','dev-test-tau','dev-train-tau']
starss_aud_dir = '/scratch/ci411/STARSS2022/mic_dev'
starss_meta_dir = '/scratch/ci411/STARSS2022/metadata_dev'

for subfolder in starss_subfolders:
    aud_dir = os.path.join(starss_aud_dir, subfolder)
    meta_dir = os.path.join(starss_meta_dir, subfolder)
    move_data('STARSS2022', aud_dir, meta_dir, dataset_dir)

Dataset created at /scratch/ci411/SELD_Datasets/TFS_0527
Folds [] in /scratch/ci411/SELD_Datasets/TFS_0527/mic_dev
Folds [1, 2] in /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/target_noiseless/mic
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/target_noiseless/mic to /scratch/ci411/SELD_Datasets/TFS_0527/mic_dev/tfs_real
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/metadata to /scratch/ci411/SELD_Datasets/TFS_0527/metadata_dev/tfs_real
Folds [] in /scratch/ci411/SELD_Datasets/TFS_0527/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TFS_0527/mic_dev/tfs_real
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-sony
Copying from /scratch/ci411/STARSS2022/mic_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TFS_0527/mic_dev/STARSS2022
Copying from /scratch/ci411/STARSS2022/metadata_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TFS_0527/metadata_dev/STARSS2022
Folds [] in /scratch/ci411/SELD_Datasets/TFS_0527/mic_dev
Folds [4] in /scratch/ci

In [7]:
dataset_name = 'TFR_0601'
dataset_dir = generate_dataset(dataset_name)

tfr_aud = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/target_noiseless/mic'
tfr_meta = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/metadata'
move_data('tfr_real', tfr_aud, tfr_meta, dataset_dir)

starss_subfolders = ['dev-test-sony','dev-train-sony','dev-test-tau','dev-train-tau']
starss_aud_dir = '/scratch/ci411/STARSS2022/mic_dev'
starss_meta_dir = '/scratch/ci411/STARSS2022/metadata_dev'

for subfolder in starss_subfolders:
    aud_dir = os.path.join(starss_aud_dir, subfolder)
    meta_dir = os.path.join(starss_meta_dir, subfolder)
    move_data('STARSS2022', aud_dir, meta_dir, dataset_dir)

Dataset created at /scratch/ci411/SELD_Datasets/TFR_0601
Folds [] in /scratch/ci411/SELD_Datasets/TFR_0601/mic_dev
Folds [1, 2] in /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/target_noiseless/mic
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/target_noiseless/mic to /scratch/ci411/SELD_Datasets/TFR_0601/mic_dev/tfr_real
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_real/metadata to /scratch/ci411/SELD_Datasets/TFR_0601/metadata_dev/tfr_real
Folds [] in /scratch/ci411/SELD_Datasets/TFR_0601/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TFR_0601/mic_dev/tfr_real
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-sony
Copying from /scratch/ci411/STARSS2022/mic_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TFR_0601/mic_dev/STARSS2022
Copying from /scratch/ci411/STARSS2022/metadata_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TFR_0601/metadata_dev/STARSS2022
Folds [] in /scratch/ci411/SELD_Datasets/TFR_0601/mic_dev
Folds [4] in /scratch

In [8]:
dataset_name = 'TFS_0601'
dataset_dir = generate_dataset(dataset_name)

tfs_aud = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/target_noiseless/mic'
tfs_meta = '/scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/metadata'
move_data('tfs_real', tfs_aud, tfs_meta, dataset_dir)

starss_subfolders = ['dev-test-sony','dev-train-sony','dev-test-tau','dev-train-tau']
starss_aud_dir = '/scratch/ci411/STARSS2022/mic_dev'
starss_meta_dir = '/scratch/ci411/STARSS2022/metadata_dev'

for subfolder in starss_subfolders:
    aud_dir = os.path.join(starss_aud_dir, subfolder)
    meta_dir = os.path.join(starss_meta_dir, subfolder)
    move_data('STARSS2022', aud_dir, meta_dir, dataset_dir)

Dataset created at /scratch/ci411/SELD_Datasets/TFS_0601
Folds [] in /scratch/ci411/SELD_Datasets/TFS_0601/mic_dev
Folds [1, 2] in /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/target_noiseless/mic
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/target_noiseless/mic to /scratch/ci411/SELD_Datasets/TFS_0601/mic_dev/tfs_real
Copying from /scratch/ci411/DCASE_GEN/mixtures/tau_fsd50k_sim/metadata to /scratch/ci411/SELD_Datasets/TFS_0601/metadata_dev/tfs_real
Folds [] in /scratch/ci411/SELD_Datasets/TFS_0601/mic_dev
Folds [1, 2] in /scratch/ci411/SELD_Datasets/TFS_0601/mic_dev/tfs_real
Folds [4] in /scratch/ci411/STARSS2022/mic_dev/dev-test-sony
Copying from /scratch/ci411/STARSS2022/mic_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TFS_0601/mic_dev/STARSS2022
Copying from /scratch/ci411/STARSS2022/metadata_dev/dev-test-sony to /scratch/ci411/SELD_Datasets/TFS_0601/metadata_dev/STARSS2022
Folds [] in /scratch/ci411/SELD_Datasets/TFS_0601/mic_dev
Folds [4] in /scratch/ci