In [3]:
import os
import torch
import torchaudio
from torchaudio.transforms import Resample

# fixed-point conversion function
def to_fixed_point(tensor, int_bits, frac_bits):
    scale_factor = 2 ** frac_bits
    max_value = (2 ** (int_bits - 1)) - 1 / scale_factor
    min_value = -(2 ** (int_bits - 1))
    
    # Round and clamp all values in the tensor at once
    tensor_fp = torch.round(tensor * scale_factor)
    tensor_fp = torch.clamp(tensor_fp, min_value * scale_factor, max_value * scale_factor)
    return tensor_fp / scale_factor

def preprocess_and_save(file_path, target_sample_rate, int_bits, frac_bits, raw_root, preprocessed_root):
    waveform, sample_rate = torchaudio.load(file_path)
    if sample_rate != target_sample_rate:
        resampler = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
        waveform = resampler(waveform)
    
    waveform_fp = to_fixed_point(waveform, int_bits, frac_bits)
    
    relative_path = os.path.relpath(file_path, raw_root)
    save_path = os.path.join(preprocessed_root, relative_path)
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    
    torchaudio.save(save_path, waveform_fp, target_sample_rate)

# maintain folder structure
def preprocess_dataset(raw_root, preprocessed_root, target_sample_rate, int_bits, frac_bits):
    for dirpath, _, filenames in os.walk(raw_root):
        for filename in filenames:
            if filename.lower().endswith(('.wav', '.flac')):  
                file_path = os.path.join(dirpath, filename)
                preprocess_and_save(file_path, target_sample_rate, int_bits, frac_bits, raw_root, preprocessed_root)

for NUM_SOURCES in range(2, 5):
    raw_data_root = f'G:\\Jupyter\\SoundMixer\\{NUM_SOURCES}Speakers5KHalf'
    preprocessed_data_root = f'{NUM_SOURCES}Speakers5KHalfPreprocessed'
    target_sample_rate = 8000
    int_bits = 4
    frac_bits = 10

    preprocess_dataset(raw_data_root, preprocessed_data_root, target_sample_rate, int_bits, frac_bits)
