In [None]:
input_folder = "./input_recordings/"  # Specify the input folder path
output_csv = "noise_profiles.csv"


In [None]:
import os
import numpy as np
import pandas as pd
import librosa
from pydub import AudioSegment

In [None]:
# high-pass filter
def highpass_filter(y, sr, cutoff=250, order=1):
    from scipy.signal import butter, lfilter

    nyquist = 0.5 * sr
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    y_filtered = lfilter(b, a, y)
    return y_filtered


In [None]:
def compute_noise_profile(chunk):
    """
    Creates a noise profile from one channel of audio data.
    """
    data = chunk.get_array_of_samples()
    dtype = data.typecode  # The type of the array
    y = np.array(data, dtype=np.float32)  # Always convert to float32

    # Normalization depending on the data type
    if dtype == 'h':  # 16-bit integer
        y = y / (2**15)
    elif dtype == 'i':  # 32-bit integer
        y = y / (2**31)
    else:
        raise ValueError(f"Unsupported data format: {dtype}")

    sr = chunk.frame_rate

    # XXX Hz high-pass filter
    y = highpass_filter(y, sr)

    # Spectrogram calculation
    n_fft = 1024
    hop_length = 8000
    S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))

    # Noise profile: median vector
    noise_profile = np.median(S, axis=1)
    return noise_profile



In [None]:



def process_wav_file(file_path):
    """
    Process WAV file, create noise profile for each channel.
    """
    audio = AudioSegment.from_file(file_path)
    channels = audio.split_to_mono()

    profiles = []
    for channel_idx, channel in enumerate(channels):
        noise_profile = compute_noise_profile(channel)
        profiles.append((os.path.basename(file_path), channel_idx + 1, noise_profile))
    return profiles



In [None]:

output_data = []

for fidx, file_name in enumerate(os.listdir(input_folder)):
    if file_name.endswith(".wav"):
        file_path = os.path.join(input_folder, file_name)
        print(f"Processing: {file_path}")
        profiles = process_wav_file(file_path)
        
        for file_name, channel, profile in profiles:
            print(f"{file_name} {channel}")
            for idx, value in enumerate(profile):
                output_data.append({"filename": file_name, "channel": channel, "frequency_bin": idx, "value": value})

            # Save results to DataFrame
            output_df = pd.DataFrame(output_data)
            output_df.to_csv(output_csv, index=False)
            print(f"Noise profiles saved: {output_csv}")



In [None]:
profiles
