In [1]:
###%pip install wave numpy pathlib

In [2]:
import os
import wave
import numpy as np
from pathlib import Path

In [3]:
input_dir = 'data/unparsed'
output_dir = 'data/parsed'

In [4]:
def sanitize_filename(filename):
    # Replace spaces and other problematic characters with underscores
    return "".join(char if char.isalnum() or char in "._-" else "_" for char in filename)

In [5]:
def read_wav(filename):
    with wave.open(filename, 'rb') as wav:
        params = wav.getparams()
        frames = wav.readframes(params.nframes)
        if params.sampwidth == 3:
            # Read frames as bytes, then convert to 32-bit integers for processing
            audio = np.frombuffer(frames, dtype=np.uint8)
            # Assuming little endian byte order
            audio = audio.reshape(-1, 3)
            audio = audio[:, 0] + (audio[:, 1] << 8) + (audio[:, 2] << 16)
            audio = audio.astype(np.int32)
            # Adjust for 24-bit signed format
            audio[audio >= 0x800000] -= 0x1000000
        else:
            audio = np.frombuffer(frames, dtype=np.int16)
    return audio, params

def convert_to_16bit(audio):
    # Shift right by 8 bits to convert from 24-bit to 16-bit
    return (audio >> 8).astype(np.int16)

def save_wav(audio, params, filename):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with wave.open(filename, 'wb') as wav:
        # Adjust the parameters for 16-bit audio
        new_params = wave._wave_params(params.nchannels, 2, params.framerate, len(audio), 'NONE', 'not compressed')
        wav.setparams(new_params)
        wav.writeframes(audio.tobytes())

In [6]:
def process_wav_file(input_filename, output_filename):
    audio, params = read_wav(input_filename)
    if params.sampwidth == 3:  # Check if it's 24-bit audio
        audio = convert_to_16bit(audio)
        save_wav(audio, params, output_filename)
        print(f"Converted and saved: {output_filename}")
    else:
        print(f"No conversion needed for: {output_filename}")

In [7]:
def process_directory(input_dir, output_dir):
    input_path = Path(input_dir)
    for wav_file in input_path.rglob('*.wav'):
        relative_path = wav_file.relative_to(input_path)
        sanitized_relative_path = Path(*[sanitize_filename(part) for part in relative_path.parts])
        output_path = Path(output_dir) / sanitized_relative_path
        process_wav_file(str(wav_file), str(output_path))

In [8]:
process_directory(input_dir, output_dir)

No conversion needed for: data/parsed/Snares/Dead_Snare_Classic__8_.wav
Converted and saved: data/parsed/Snares/ALC_Snare_01.wav
Converted and saved: data/parsed/Snares/ALC_Snare_03.wav
Converted and saved: data/parsed/Snares/Snare_1.wav
Converted and saved: data/parsed/Snares/ALC_Snare_10.wav
No conversion needed for: data/parsed/Snares/Dead_Snare_Classic__3_.wav
Converted and saved: data/parsed/Snares/OS_CES_Snare_2.wav
Converted and saved: data/parsed/Snares/ALC_Snare_05.wav
Converted and saved: data/parsed/Snares/SD_extra_Snare9.wav
Converted and saved: data/parsed/Snares/SD_extra_Snare2.wav
Converted and saved: data/parsed/Kicks/Kick_9_-_4A.wav
Converted and saved: data/parsed/Kicks/ALC_Kick_05.wav
Converted and saved: data/parsed/Kicks/Synth_Kick_4_-_4A_-_158.wav
No conversion needed for: data/parsed/Kicks/Diplo_Kicks__31__-_5A_-_158.wav
No conversion needed for: data/parsed/Kicks/Diplo_Kicks__13__-_2A_-_89.wav
Converted and saved: data/parsed/Kicks/ALC_Kick_03.wav
Converted and 