## Remove the validated test set from the clipped files

In [5]:
import os

def delete_duplicate_files(directory_clipped, directory_valid):
    # Get all .wav files in both directories
    wav_files_clipped = [f for f in os.listdir(directory_clipped) if f.lower().endswith('.wav')]
    wav_files_valid = [f for f in os.listdir(directory_valid) if f.lower().endswith('.wav')]
    
    # Find files that exist in both directories
    files_to_delete = set(wav_files_clipped) & set(wav_files_valid)

    print(files_to_delete)
    
    # Delete the files in the valid directory
    for file_name in files_to_delete:
        file_path = os.path.join(directory_clipped, file_name)
        try:
            os.remove(file_path)
            print(f"Deleted: {file_path}")
        except OSError as e:
            print(f"Error deleting {file_path}: {e}")
    
    print(f"Deleted {len(files_to_delete)} duplicate files.")

# Example usage:
delete_duplicate_files("../Datasets/clipped-4s/", "../Datasets/Valided_testset/")

{'other_4616.wav', 'none_1692.wav', 'Weale_225.wav', 'Eastern_446.wav', 'Rain_754.wav', 'Peninsula_55.wav', 'Banded_353.wav', 'Clicking_415.wav', 'other_2.wav', 'Eastern_497.wav', 'De_82.wav', 'Eastern_224.wav', 'Rain_550.wav', 'none_21.wav', 'Painted_38.wav', 'Banded_453.wav', 'Southern_1438.wav', 'Rain_563.wav', 'Painted_230.wav', 'other_2664.wav', 'Southern_1382.wav', 'De_103.wav', 'none_31.wav', 'Weale_274.wav', 'none_703.wav', 'Peninsula_268.wav', 'Clicking_1.wav', 'Peninsula_17.wav', 'De_124.wav', 'Rain_726.wav', 'Weale_78.wav', 'Eastern_129.wav', 'Rain_833.wav', 'Peninsula_219.wav', 'Southern_1417.wav', 'Peninsula_90.wav', 'none_1662.wav', 'Southern_239.wav', 'none_258.wav', 'Mountain_2617.wav', 'other_1033.wav', 'Mountain_2485.wav', 'Painted_90.wav', 'De_343.wav', 'Mountain_2702.wav', 'Eastern_235.wav', 'none_36.wav', 'Clicking_366.wav', 'Banded_810.wav', 'Banded_458.wav', 'none_35.wav', 'other_2642.wav', 'Southern_1507.wav', 'Peninsula_181.wav', 'Clicking_293.wav', 'De_388.wav

In [2]:
import os
import shutil

def copy_files_with_conflict_resolution(src_dir, dst_dir):
    """
    Copy all files from src_dir to dst_dir, adding a number to the filename if it already exists.
    Example:
    - If "file.txt" exists in dst_dir, the new file becomes "file_1.txt".
    - If "file_1.txt" also exists, the next becomes "file_2.txt", etc.
    """
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    for filename in os.listdir(src_dir):
        src_path = os.path.join(src_dir, filename)
        if os.path.isfile(src_path):  # Only process files, not subdirectories
            base_name, ext = os.path.splitext(filename)
            dst_path = os.path.join(dst_dir, filename)
            counter = 1

            # Keep incrementing counter until the destination path is available
            while os.path.exists(dst_path):
                new_name = f"{base_name}_{counter}{ext}"
                dst_path = os.path.join(dst_dir, new_name)
                counter += 1

            shutil.copy2(src_path, dst_path)
            print(f"Copied: {src_path} -> {dst_path}")

if __name__ == "__main__":
    source_directory = "../Datasets/Raw-audio/calls-clipped/"
    destination_directory = "../Datasets/clipped-4s/"

    copy_files_with_conflict_resolution(source_directory, destination_directory)
    print("Copy operation completed!")

Copied: ../Datasets/Raw-audio/calls-clipped/other_384.wav -> ../Datasets/clipped-4s/other_384_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_416.wav -> ../Datasets/clipped-4s/other_416_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_597.wav -> ../Datasets/clipped-4s/other_597_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_519.wav -> ../Datasets/clipped-4s/other_519_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_509.wav -> ../Datasets/clipped-4s/other_509_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_255.wav -> ../Datasets/clipped-4s/other_255_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_510.wav -> ../Datasets/clipped-4s/other_510_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_386.wav -> ../Datasets/clipped-4s/other_386_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_454.wav -> ../Datasets/clipped-4s/other_454_1.wav
Copied: ../Datasets/Raw-audio/calls-clipped/other_258.wav -> ../Datasets/clipped-4s/other_258_1.wav


In [None]:
import h5py

# Path to your .h5 file
h5_file_path = "../Datasets/FD_5.1.h5"

# Open the .h5 file
with h5py.File(h5_file_path, 'r') as hf:
    # Iterate over the datasets
    for key in hf.keys():
        dataset = hf[key]
        print(f"Dataset '{key}' has shape: {dataset.shape}")

Dataset 'Banded_108' has shape: (256, 526)
Dataset 'Banded_120' has shape: (256, 526)
Dataset 'Banded_123' has shape: (256, 526)
Dataset 'Banded_130' has shape: (256, 526)
Dataset 'Banded_134' has shape: (256, 526)
Dataset 'Banded_136' has shape: (256, 526)
Dataset 'Banded_190' has shape: (256, 526)
Dataset 'Banded_207' has shape: (256, 526)
Dataset 'Banded_208' has shape: (256, 526)
Dataset 'Banded_211' has shape: (256, 526)
Dataset 'Banded_226' has shape: (256, 526)
Dataset 'Banded_229' has shape: (256, 526)
Dataset 'Banded_240' has shape: (256, 526)
Dataset 'Banded_245' has shape: (256, 526)
Dataset 'Banded_249' has shape: (256, 526)
Dataset 'Banded_250' has shape: (256, 526)
Dataset 'Banded_283' has shape: (256, 526)
Dataset 'Banded_291' has shape: (256, 526)
Dataset 'Banded_316' has shape: (256, 526)
Dataset 'Banded_321' has shape: (256, 526)
Dataset 'Banded_328' has shape: (256, 526)
Dataset 'Banded_33' has shape: (256, 526)
Dataset 'Banded_331' has shape: (256, 526)
Dataset 'Ban

In [2]:
import numpy as np

# Path to your .npz file
npz_file_path = "../Datasets/folds_10_5.0/shard0.npz"

# Load the .npz file
with np.load(npz_file_path) as data:
    # Iterate over the arrays stored in the .npz file
    for key in data.files:
        array = data[key]
        print(f"Array '{key}' has shape: {array.shape}")

Array 'Banded_529' has shape: (256, 132)
Array 'none_567' has shape: (256, 132)
Array 'other_4779' has shape: (256, 132)
Array 'none_1218' has shape: (256, 132)
Array 'Rain_116' has shape: (256, 132)
Array 'other_590' has shape: (256, 132)
Array 'other_531' has shape: (256, 132)
Array 'Peninsula_245' has shape: (256, 132)
Array 'Clicking_170' has shape: (256, 132)
Array 'Rain_299' has shape: (256, 132)
Array 'Mountain_1922' has shape: (256, 132)
Array 'other_3094' has shape: (256, 132)
Array 'Banded_7' has shape: (256, 132)
Array 'none_279' has shape: (256, 132)
Array 'Eastern_361' has shape: (256, 132)
Array 'other_2159' has shape: (256, 132)
Array 'other_2802' has shape: (256, 132)
Array 'Mountain_1634' has shape: (256, 132)
Array 'Rain_941' has shape: (256, 132)
Array 'Clicking_417' has shape: (256, 132)
Array 'Eastern_110' has shape: (256, 132)
Array 'none_1931' has shape: (256, 132)
Array 'none_1112' has shape: (256, 132)
Array 'Mountain_1048' has shape: (256, 132)
Array 'none_145

In [12]:
import h5py
import numpy as np
from collections import Counter

def count_classes_in_h5(h5_file_path):
    # Open the .h5 file
    with h5py.File(h5_file_path, 'r') as file:
        # Get the keys (assuming that each key corresponds to a sample and the class label is embedded in the key)
        keys = list(file.keys())
        
        # Extract the labels from the keys (assuming labels are the first part of the key)
        labels = [key.split('_')[0] for key in keys]
        
        # Count occurrences of each label
        class_counts = Counter(labels)
        
    return class_counts

# Example usage
h5_file_path = '../Datasets/folds_10_5.0/test.h5'  # Replace with your actual file path
class_counts = count_classes_in_h5(h5_file_path)

# Print the result
print("Class counts:")
for class_label, count in class_counts.items():
    print(f"{class_label}: {count}")

Class counts:
Banded: 89
Clicking: 113
De: 38
Eastern: 65
Mountain: 263
Painted: 25
Peninsula: 38
Rain: 106
Southern: 156
Weale: 35
none: 222
other: 480
