In [1]:
###%pip install wave numpy pathlib

In [2]:
import os
import wave
import numpy as np
from pathlib import Path

In [3]:
def sanitize_filename(filename):
    # Replace spaces with underscores and remove other problematic characters if necessary
    sanitized = "".join(char if char.isalnum() or char in "._-" else "_" for char in filename)
    return sanitized if sanitized else "default_filename"

In [4]:
def read_wav(filename):
    with wave.open(filename, 'r') as wav:
        n_frames = wav.getnframes()
        audio_bytes = wav.readframes(n_frames)
        if wav.getsampwidth() == 3:  # 24-bit audio
            # Convert byte data to 24-bit integers
            audio = np.frombuffer(audio_bytes, dtype=np.uint8).reshape(-1, 3)
            audio = audio[:, 0] + (audio[:, 1] << 8) + (audio[:, 2] << 16)
            if audio.max() > 0x7FFFFF:  # Adjusting sign if needed
                audio = audio - 0x1000000
        else:  # Other bit depths, directly compatible
            audio = np.frombuffer(audio_bytes, dtype=np.int16)
    return audio, wav.getparams()

def convert_to_16bit(audio, params):
    # Right shift to go from 24-bit to 16-bit
    audio = (audio >> 8).astype(np.int16)
    return audio

def save_wav(audio, params, filename):
###    filename = sanitize_filename(filename)
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with wave.open(filename, 'w') as wav:
        wav.setparams(params)
        wav.writeframes(audio.tobytes())

In [5]:
def process_wav_file(input_filename, output_filename):
    audio, params = read_wav(input_filename)
    if params.sampwidth == 3:  # 24-bit audio
        audio = convert_to_16bit(audio, params)
        # Update parameters for 16-bit
        new_params = wave._wave_params(params.nchannels, 2, params.framerate, params.nframes, params.comptype, params.compname)
        save_wav(audio, new_params, output_filename)
        print(f"Converted {input_filename} to 16-bit and saved as {output_filename}")
    else:
        print(f"{input_filename} is not a 24-bit file, no conversion necessary")

In [9]:
def process_directory(input_dir, output_dir):
    input_path = Path(input_dir)
    for wav_file in input_path.rglob('*.wav'):
        relative_path = wav_file.relative_to(input_path)
        sanitized_relative_path = Path(*[sanitize_filename(part) for part in relative_path.parts])
        output_path = Path(output_dir) / sanitized_relative_path
        process_wav_file(str(wav_file), str(output_path))

In [10]:
input_dir = 'audio-sample-organizer/data/unparsed'
output_dir = 'audio-sample-organizer/data/parsed'
process_directory(input_dir, output_dir)

audio-sample-organizer/data/unparsed/Snares/Dead_Snare_Classic (8).wav is not a 24-bit file, no conversion necessary
Converted audio-sample-organizer/data/unparsed/Snares/ALC_Snare_01.wav to 16-bit and saved as audio-sample-organizer/data/parsed/Snares/ALC_Snare_01.wav
Converted audio-sample-organizer/data/unparsed/Snares/ALC_Snare_03.wav to 16-bit and saved as audio-sample-organizer/data/parsed/Snares/ALC_Snare_03.wav
Converted audio-sample-organizer/data/unparsed/Snares/Snare 1.wav to 16-bit and saved as audio-sample-organizer/data/parsed/Snares/Snare_1.wav
Converted audio-sample-organizer/data/unparsed/Snares/ALC_Snare_10.wav to 16-bit and saved as audio-sample-organizer/data/parsed/Snares/ALC_Snare_10.wav
audio-sample-organizer/data/unparsed/Snares/Dead_Snare_Classic (3).wav is not a 24-bit file, no conversion necessary
Converted audio-sample-organizer/data/unparsed/Snares/OS_CES_Snare 2.wav to 16-bit and saved as audio-sample-organizer/data/parsed/Snares/OS_CES_Snare_2.wav
Convert