In [1]:
# Cel A — Extra imports en helpers
from pathlib import Path
import numpy as np
import pandas as pd
import librosa, librosa.display
import soundfile as sf
import matplotlib.pyplot as plt

import ipywidgets as widgets
from IPython.display import display, Audio
from functools import lru_cache

In [2]:
# Cel B — Bandpass filter (zoals door jou aangeleverd, met minimale aanpassing voor import)
from scipy.signal import butter, sosfiltfilt, sosfilt

def bandpass_filter(x, fs, fc=(50.0, 100.0), order=2, zero_phase=True, axis=-1):
    """
    Apply a Butterworth band-pass filter to a 1D/ND array.
    """
    low, high = fc
    if not (0 < low < high < fs/2):
        raise ValueError(f"Cutoffs must satisfy 0 < {low=} < {high=} < Nyquist ({fs/2}).")

    wn = (low/(fs/2), high/(fs/2))
    sos = butter(order, wn, btype='band', output='sos')

    x = np.asarray(x)
    if zero_phase:
        return sosfiltfilt(sos, x, axis=axis)
    else:
        return sosfilt(sos, x, axis=axis)

In [3]:
# Cel C — Snelle loader met caching (voorkomt 'blijft laden' bij veel wisselen)
@lru_cache(maxsize=256)
def load_audio_cached(path_str, mono=True):
    y, sr = librosa.load(path_str, sr=None, mono=mono)
    return y, sr

In [4]:
# Cel 2 — Bestanden vinden, metadata + labels uitlezen
from pathlib import Path
import pandas as pd
import soundfile as sf

# Root-map die zowel 'coughing' als 'heartsounds' bevat
data_root = Path("../data")  # <- pas aan indien nodig

VALID_TASKS = {"coughing", "heartsounds"}
VALID_MICS  = {"new_microphone": "new", "old_microphone": "old"}

def infer_labels_and_relpath(path: Path, root: Path):
    """Haal task- en mic-labels uit het pad. Geeft ook pad relatief t.o.v. root terug."""
    rel = path.relative_to(root)
    task = None
    mic  = None
    for part in rel.parts:
        if part in VALID_TASKS:
            task = part
        if part in VALID_MICS:
            mic = VALID_MICS[part]
    if task is None or mic is None:
        raise ValueError(f"Kan labels niet afleiden uit pad: {path}")
    return task, mic, str(rel)

# Vind alle .wav-bestanden binnen de 4 doelmappen
expected_dirs = [
    data_root / "coughing"    / "new_microphone",
    data_root / "coughing"    / "old_microphone",
    data_root / "heartsounds" / "new_microphone",
    data_root / "heartsounds" / "old_microphone",
]

wav_paths = []
for d in expected_dirs:
    if d.is_dir():
        wav_paths.extend(sorted(d.rglob("*.wav")))

if not wav_paths:
    raise FileNotFoundError(
        f"Geen .wav-bestanden gevonden in de verwachte mappen onder: {data_root.resolve()}\n"
        f"Verwacht: {', '.join(str(p) for p in expected_dirs)}"
    )

rows = []
for p in wav_paths:
    try:
        with sf.SoundFile(p) as f:
            sr = int(f.samplerate)
            n_frames = int(len(f))
            channels = int(f.channels)
            duration = n_frames / sr
    except Exception as e:
        # Sla corrupte / onleesbare bestanden over maar ga door
        print(f"Waarschuwing: {p} kon niet gelezen worden ({e}). Bestand overgeslagen.")
        continue

    task, mic, rel = infer_labels_and_relpath(p, data_root)

    rows.append({
        "path": str(p),
        "name": p.name,
        "relative_to_data": rel,
        "sr": sr,
        "channels": channels,
        "n_samples": n_frames,
        "duration_sec": duration,
        # labels
        "label_task": task,       # 'coughing' of 'heartsounds'
        "label_mic": mic,         # 'new' of 'old'
    })

df = pd.DataFrame(rows).sort_values(["label_task", "label_mic", "name"]).reset_index(drop=True)

In [5]:
df.head(10)

Unnamed: 0,path,name,relative_to_data,sr,channels,n_samples,duration_sec,label_task,label_mic
0,..\data\coughing\new_microphone\cough_1.wav,cough_1.wav,coughing\new_microphone\cough_1.wav,15750,1,468992,29.77727,coughing,new
1,..\data\coughing\new_microphone\cough_2.wav,cough_2.wav,coughing\new_microphone\cough_2.wav,15750,1,468992,29.77727,coughing,new
2,..\data\coughing\new_microphone\cough_3.wav,cough_3.wav,coughing\new_microphone\cough_3.wav,15750,1,468992,29.77727,coughing,new
3,..\data\coughing\new_microphone\cough_4.wav,cough_4.wav,coughing\new_microphone\cough_4.wav,15750,1,468992,29.77727,coughing,new
4,..\data\coughing\new_microphone\cough_5.wav,cough_5.wav,coughing\new_microphone\cough_5.wav,15750,1,468992,29.77727,coughing,new
5,..\data\coughing\new_microphone\normal_1.wav,normal_1.wav,coughing\new_microphone\normal_1.wav,15750,1,468992,29.77727,coughing,new
6,..\data\coughing\new_microphone\normal_2.wav,normal_2.wav,coughing\new_microphone\normal_2.wav,15750,1,468992,29.77727,coughing,new
7,..\data\coughing\new_microphone\normal_3.wav,normal_3.wav,coughing\new_microphone\normal_3.wav,15750,1,468992,29.77727,coughing,new
8,..\data\coughing\new_microphone\normal_4.wav,normal_4.wav,coughing\new_microphone\normal_4.wav,15750,1,468992,29.77727,coughing,new
9,..\data\coughing\new_microphone\normal_5.wav,normal_5.wav,coughing\new_microphone\normal_5.wav,15750,1,468992,29.77727,coughing,new


In [None]:
# Cel D — Interactieve visualisatie + audio-terugluisteren
# STFT parameters
n_fft = 1024
hop_length = 256
win = "hann"

# UI controls
idx_slider   = widgets.IntSlider(value=0, min=0, max=len(df)-1, step=1, description='Index:')
use_filter   = widgets.Checkbox(value=False, description='Bandpass aan')
low_box      = widgets.FloatText(value=50.0, description='Low (Hz):', step=1.0)
high_box     = widgets.FloatText(value=2000.0, description='High (Hz):', step=1.0)
order_slider = widgets.IntSlider(value=2, min=1, max=10, step=1, description='Order:')
zero_chk     = widgets.Checkbox(value=True, description='Zero-phase')

ui_left  = widgets.VBox([idx_slider, use_filter, low_box, high_box, order_slider, zero_chk])
out_area = widgets.Output()

def _update(idx, use_filter, low, high, order, zero_phase):
    with out_area:
        out_area.clear_output(wait=True)

        # Bestand kiezen + labels ophalen
        row = df.iloc[idx]
        wav_file = row["path"]
        task_lbl = row.get("label_task", "unknown")
        mic_lbl  = row.get("label_mic", "unknown")
        title_suffix = f"[{task_lbl} | {mic_lbl}]"

        # Audio laden (cached)
        y, sr = load_audio_cached(wav_file)

        # Optioneel filteren voor afspelen + plotten
        y_view = y
        if use_filter:
            high_eff = min(float(high), sr/2 - 1.0)
            y_view = bandpass_filter(y_view, fs=sr, fc=(float(low), high_eff),
                                     order=int(order), zero_phase=bool(zero_phase))
            fmax_plot = high_eff
        else:
            fmax_plot = sr / 2.0

        # Waveform
        plt.figure(figsize=(12, 3))
        librosa.display.waveshow(y_view, sr=sr)
        plt.title(f"Waveform — {Path(wav_file).name} {title_suffix}")
        plt.xlabel("Tijd (s)")
        plt.ylabel("Amplitude")
        plt.tight_layout()
        plt.show()

        # STFT
        S = librosa.stft(y_view, n_fft=n_fft, hop_length=hop_length,
                         win_length=n_fft, window=win, center=True)
        S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
        times = librosa.frames_to_time(np.arange(S_db.shape[1]), sr=sr,
                                       hop_length=hop_length, n_fft=n_fft)
        freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        mask = freqs <= fmax_plot
        if not np.any(mask):
            mask = freqs <= (sr/2.0)
        freqs_plot = freqs[mask]
        S_db_plot  = S_db[mask, :]

        plt.figure(figsize=(12, 4))
        plt.pcolormesh(times, freqs_plot, S_db_plot, shading="auto")
        plt.title(f"STFT — {Path(wav_file).name} {title_suffix}")
        plt.xlabel("Tijd (s)")
        plt.ylabel("Frequentie (Hz)")
        cbar = plt.colorbar()
        cbar.set_label("Amplitude (dB)")
        plt.ylim(0.0, fmax_plot)
        plt.tight_layout()
        plt.show()

        # Audio player (volledige clip)
        display(Audio(y_view, rate=sr))

# Koppel controls aan output zonder de UI telkens te hercreëren (stabieler dan interact())
controls = {
    'idx': idx_slider,
    'use_filter': use_filter,
    'low': low_box,
    'high': high_box,
    'order': order_slider,
    'zero_phase': zero_chk,
}
linked_out = widgets.interactive_output(_update, controls)

display(widgets.HBox([ui_left, out_area]))
# Initiele render
_update(idx_slider.value, use_filter.value, low_box.value, high_box.value, order_slider.value, zero_chk.value)

HBox(children=(VBox(children=(IntSlider(value=0, description='Index:', max=39), Checkbox(value=False, descript…

In [7]:
# Cel E — Subset genereren: gefilterde .wav-bestanden wegschrijven + nieuw DataFrame
def make_filtered_subset(df_in, data_dir: Path, fc=(50.0, 2000.0), order=2, zero_phase=True, out_root=None):
    """
    Maakt een nieuwe map met gefilterde .wav's en geeft een DataFrame met paden terug.
    """
    tag = f"bp_{int(fc[0])}_{int(fc[1])}_o{order}{'_zp' if zero_phase else ''}"
    out_dir = Path(out_root) if out_root else (data_dir / f"subset_{tag}")
    out_dir.mkdir(parents=True, exist_ok=True)

    rows = []
    for _, row in df_in.iterrows():
        src = Path(row["path"])
        y, sr = load_audio_cached(str(src))
        y_f = bandpass_filter(y, fs=sr, fc=fc, order=order, zero_phase=zero_phase)

        dst = out_dir / (src.stem + f"_{tag}.wav")
        sf.write(dst, y_f, sr)

        rows.append({
            "path": str(dst),
            "name": dst.name,
            "relative_to_subset": str(dst.relative_to(out_dir)),
            "sr": sr,
            "channels": 1,  # we laden mono
            "n_samples": len(y_f),
            "duration_sec": len(y_f)/sr,
            "source_path": str(src)
        })

    df_out = pd.DataFrame(rows)
    return df_out, out_dir

In [None]:
# Voorbeeld aanroep:
data_dir = Path("../data/coughing")
df_filtered, subset_dir = make_filtered_subset(df, data_dir, fc=(50.0, 2000.0), order=2, zero_phase=True)
df_filtered.head()

Unnamed: 0,path,name,relative_to_subset,sr,channels,n_samples,duration_sec,source_path
0,..\data\coughing\subset_bp_50_2000_o2_zp\cough...,cough_1_bp_50_2000_o2_zp.wav,cough_1_bp_50_2000_o2_zp.wav,15750,1,468992,29.77727,..\data\coughing\new_microphone\cough_1.wav
1,..\data\coughing\subset_bp_50_2000_o2_zp\cough...,cough_2_bp_50_2000_o2_zp.wav,cough_2_bp_50_2000_o2_zp.wav,15750,1,468992,29.77727,..\data\coughing\new_microphone\cough_2.wav
2,..\data\coughing\subset_bp_50_2000_o2_zp\cough...,cough_3_bp_50_2000_o2_zp.wav,cough_3_bp_50_2000_o2_zp.wav,15750,1,468992,29.77727,..\data\coughing\new_microphone\cough_3.wav
3,..\data\coughing\subset_bp_50_2000_o2_zp\cough...,cough_4_bp_50_2000_o2_zp.wav,cough_4_bp_50_2000_o2_zp.wav,15750,1,468992,29.77727,..\data\coughing\new_microphone\cough_4.wav
4,..\data\coughing\subset_bp_50_2000_o2_zp\cough...,cough_5_bp_50_2000_o2_zp.wav,cough_5_bp_50_2000_o2_zp.wav,15750,1,468992,29.77727,..\data\coughing\new_microphone\cough_5.wav
