In [15]:
import os
import librosa
import soundfile as sf
import numpy

# Numpy 2.0.0 or less is required by Numba
print(numpy.__version__)

2.0.0


In [16]:
# Define source and destination paths
input_dir = '../data/raw/gunshot'
output_dir = '../data/preprocessed/gunshot'
file_prefix = 'gunshot_'

In [22]:
def preprocess_wav(src = '', dst = '', target_sr = 16000, file_num = 0):
    # Load file from file path, set sample rate to 16,000 HZ
    data, sr = librosa.load(src, sr = target_sr)

    # Ensure `data` is a NumPy array
    if not isinstance(data, (list, tuple)):
        data = data.astype(float)

    # Extract duration of the .wav file (dependent on sample rate)
    duration = librosa.get_duration(y = data, sr = target_sr)

    try:
        # Code to run if .wav is 2 seconds long
        if duration == 2.0:
            # Save new audio sample: 16,000 Hz, 2 seconds
            output_path = os.path.join(dst, f"{file_prefix}{file_num}.wav")
            sf.write(output_path, data, target_sr)

        # Code to run if .wav is 2 seconds long
        elif duration == 4.0:
            # Segmentation into 3 2 second chunks
            segments = [
                (0, int(target_sr * 2)),                    # First 2 seconds: 0 -> 2
                (int(target_sr * 1), int(target_sr * 3)),   # Middle 2 seconds 1 -> 3
                (int(target_sr * 2), int(target_sr * 4))    # Last 2 seconds: 2 -> 4
            ]

            # Generate new .wav for each sound sample
            for i, (start, end) in enumerate(segments):
                # Convert start and end to integers explicitly
                start, end = int(start), int(end)

                audio_segment = data[start:end]
                output_path = os.path.join(dst, f"{file_prefix}{file_num}.wav")
                sf.write(output_path, audio_segment, target_sr)

        # TODO: Potentially add edge case that can except files of other durations
        else:
            print(f"Skipping file {src} due to unexpected duration.")

        print(f"Processed file {src} and saved to {os.path.join(dst, f"{file_prefix}{file_num}.wav")}")
    except Exception as error:
        print(f"Error processing file {src}: {error}")

In [23]:
count = 0

# Process all .wavs in input directory
for file in os.listdir(input_dir):
    if file.endswith(".wav"):
        file_path = os.path.join(input_dir, file)
        preprocess_wav(file_path, output_dir)
        count += 1

print(f"Processed {count} files.")

Processed file ../data/raw/gunshot/Urban Sound 8k (3).wav and saved to ../data/preprocessed/gunshot/gunshot_0.wav
Processed 1 files.
