In [1]:
import os
import librosa
import soundfile as sf
import numpy
print(numpy.__version__)

1.26.4


In [2]:
# Define source and destination paths
input_dir = '../data/raw/gunshot'
output_dir = '../data/preprocessed/gunshot'
file_prefix = 'gunshot_'

In [3]:
def preprocess_wav(src = '', dst = '', target_sr = 16000, file_num = 0):
    # Load file from file path, set sample rate to 16,000 HZ
    data, sr = librosa.load(src, sr = target_sr)

    # Ensure `data` is a NumPy array
    if not isinstance(data, (list, tuple)):
        data = data.astype(float)

    # Extract duration of the .wav file (dependent on sample rate)
    duration = librosa.get_duration(y = data, sr = target_sr)

    try:
        # Code to run if .wav is 2 seconds long
        if duration == 2.0:
            # Save new audio sample: 16,000 Hz, 2 seconds
            output_path = os.path.join(dst, f"{file_prefix}{file_num}.wav")
            sf.write(output_path, data, target_sr)

            print(f"Processed file {src} and saved to {os.path.join(dst, f"{file_prefix}{file_num}.wav")}")

        # Code to run if .wav is gr then 2 seconds long
        elif duration > 2.0:
            # Array to store audio segment partitions
            segments = []
            for i in range(0, int(duration) - 1, 1):

                # Only append data that is 2 seconds in duration
                if i + 2 <= int(duration):
                    segments.append((int(target_sr * i), int(target_sr * (i + 2))))

            # Generate new .wav for each sound sample
            for i, (start, end) in enumerate(segments):
                # Convert start and end to integers explicitly
                start, end = int(start), int(end)

                audio_segment = data[start:end]
                output_path = os.path.join(dst, f"{file_prefix}{file_num}_{i}.wav")
                sf.write(output_path, audio_segment, target_sr)

                print(f"Processed file {src} and saved to {os.path.join(dst, f"{file_prefix}{file_num}_{i}.wav")}")
        else:
            print(f"Skipping file {src} due to unexpected duration.")
    except Exception as error:
        print(f"Error processing file {src}: {error}")

In [5]:
count = 0

# Process all .wavs in input directory
for file in os.listdir(input_dir):
    if file.endswith(".wav"):
        file_path = os.path.join(input_dir, file)
        preprocess_wav(src = file_path, dst = output_dir, file_num = count)
        count += 1

print(f"Processed {count} files.")

Processed file ../data/raw/gunshot/Gunshot Audio 3.wav and saved to ../data/preprocessed/gunshot/gunshot_0.wav
Processed file ../data/raw/gunshot/Gunshot Audio 31.wav and saved to ../data/preprocessed/gunshot/gunshot_1.wav
Processed file ../data/raw/gunshot/Gunshot Audio 24.wav and saved to ../data/preprocessed/gunshot/gunshot_2.wav
Processed 3 files.
