In [7]:
import os
import soundfile as sf
import numpy as np

def verify_audio_files(directory, error_log, silent_log):
    """
    Verify all audio files in the given directory.
    Logs any problematic files to an error log.
    Returns the number of files processed.
    """
    problematic_files = []
    silent_files = []
    file_count = 0
    deleted_files_count = 0

    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_count += 1
            file_path = os.path.join(directory, filename)
            try:
                # Attempt to read the audio file
                data, samplerate = sf.read(file_path)
                if np.all(np.abs(data) < 1e-4):  # Adjust the threshold if necessary
                    silent_files.append(file_path)
                    os.remove(file_path)  # Delete the silent file
                    deleted_files_count += 1
            except Exception as e:
                error_message = f"Error reading {file_path}: {e}"
                print(error_message)
                problematic_files.append(error_message)

    # Log problematic files
    with open(error_log, 'w') as f:
        for error in problematic_files:
            f.write(f"{error}\n")

    # Log silent files
    with open(silent_log, 'w') as f:
        for silent_file in silent_files:
            f.write(f"{silent_file}\n")

    return file_count, deleted_files_count

# Directories to check
data_dirs = [
    "/home/gridsan/abradshaw/MITBuzz/Positive_1sec_20240607",
    "/home/gridsan/abradshaw/MITBuzz/Negative_1sec_20240607"
]

error_log = "/home/gridsan/abradshaw/MITBuzz/error_log.txt"
silent_log = "/home/gridsan/abradshaw/MITBuzz/silent_log.txt"
total_files = 0
total_deleted_files = 0

# Verify audio files in each directory
for data_dir in data_dirs:
    num_files, num_deleted_files = verify_audio_files(data_dir, error_log, silent_log)
    total_files += num_files
    total_deleted_files += num_deleted_files
    print(f"Number of files in {data_dir}: {num_files}")
    print(f"Number of deleted silent files in {data_dir}: {num_deleted_files}")


print(f"Total number of files in all directories: {total_files}")
print(f"Total number of deleted silent files in all directories: {total_deleted_files}")
print("Audio file verification completed. Check the error log for details.")


Number of files in /home/gridsan/abradshaw/MITBuzz/Positive_1sec_20240607: 10027
Number of deleted silent files in /home/gridsan/abradshaw/MITBuzz/Positive_1sec_20240607: 0
Number of files in /home/gridsan/abradshaw/MITBuzz/Negative_1sec_20240607: 11930
Number of deleted silent files in /home/gridsan/abradshaw/MITBuzz/Negative_1sec_20240607: 211
Total number of files in all directories: 21957
Total number of deleted silent files in all directories: 211
Audio file verification completed. Check the error log for details.
