# dEchorate HDF5 RIR Analysis

This notebook provides a robust interface to the dEchorate HDF5 database.
It loads metadata, maps IDs to HDF5 indices, and provides visualization tools for signal analysis (Waveform, Spectrogram, Energy Decay).

In [1]:
# 1. Imports
from pathlib import Path
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import scipy.signal

%matplotlib inline

In [None]:
# 2. Configuration
PROCESSED_METADATA_PATH = Path(
    "../../data/dEchorate/processed/dEchorate_database_cleaned.csv"
)
H5_PATH = Path("../../data/dEchorate/raw/dEchorate_rirs_gzip7.hdf5")
SAMPLING_RATE = 48000
ROOM_DIMS = np.array([6.0, 6.0, 2.4])  # meters

# Validation
assert (
    PROCESSED_METADATA_PATH.exists()
), f"Metadata not found at {PROCESSED_METADATA_PATH}"
assert H5_PATH.exists(), f"HDF5 file not found at {H5_PATH}"

In [None]:
# 3. Load Metadata & ID Mapping
print("Loading metadata...")
df = pd.read_csv(PROCESSED_METADATA_PATH)

# Ensure IDs are integers for consistent mapping
for col in ["room_code", "src_id", "mic_id"]:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(-1).astype(int)

# Create Mappings (ID -> Index)
# We sort unique values to ensure deterministic mapping
# HDF5 structure is known to be: (11 rooms, 6 sources, 30 mics, 48000 samples)
unique_rooms = sorted(df[df["room_code"] >= 0]["room_code"].unique())
unique_srcs = sorted(df[df["src_id"] >= 0]["src_id"].unique())
unique_mics = sorted(df[df["mic_id"] >= 0]["mic_id"].unique())

room_map = {id_: i for i, id_ in enumerate(unique_rooms)}
src_map = {id_: i for i, id_ in enumerate(unique_srcs)}
mic_map = {id_: i for i, id_ in enumerate(unique_mics)}

print(f"Found {len(unique_rooms)} Rooms: {unique_rooms}")
print(f"Found {len(unique_srcs)} Sources: {unique_srcs}")
print(f"Found {len(unique_mics)} Mics: {unique_mics}")


Loading metadata...
Found 2 Rooms: [np.int64(0), np.int64(1)]
Found 3 Sources: [np.int64(0), np.int64(1), np.int64(2)]
Found 30 Mics: [np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29)]


In [None]:
# 4. Define Data Loader
class DechorateLoader:
    def __init__(
        self, h5_path: str | Path, room_map: dict, src_map: dict, mic_map: dict
    ):
        self.h5_path = h5_path
        self.maps = {"room": room_map, "src": src_map, "mic": mic_map}
        self._validate_h5()

    def _validate_h5(self):
        # Check if HDF5 shape matches our mappings
        with h5py.File(self.h5_path, "r") as f:
            if "rir" not in f:
                raise KeyError("Dataset 'rir' not found in HDF5 file.")
            shape = f["rir"].shape
            print(f"Connected to HDF5. Dataset 'rir' shape: {shape}")

            # Expected: (n_rooms, n_srcs, n_mics, n_samples)
            expected = (
                len(self.maps["room"]),
                len(self.maps["src"]),
                len(self.maps["mic"]),
            )
            if shape[:3] != expected:
                print(
                    f"WARNING: HDF5 shape {shape[:3]} does not match Metadata counts {expected}. Mapping might be incorrect."
                )

    def get_rir(self, room_id, src_id, mic_id):
        """Retrieve a single RIR waveform."""
        r_idx = self.maps["room"].get(room_id)
        s_idx = self.maps["src"].get(src_id)
        m_idx = self.maps["mic"].get(mic_id)

        if any(x is None for x in [r_idx, s_idx, m_idx]):
            raise ValueError(
                f"Invalid ID combination: room={room_id}, src={src_id}, mic={mic_id}"
            )

        with h5py.File(self.h5_path, "r") as f:
            # Slice efficiently: [room, src, mic, :]
            rir = f["rir"][r_idx, s_idx, m_idx, :]
        return rir


loader = DechorateLoader(H5_PATH, room_map, src_map, mic_map)

AttributeError: 'Group' object has no attribute 'shape'

In [None]:
# 5. Signal Processing & Visualization Tools
def compute_edc(rir):
    """Compute Energy Decay Curve using Schroeder Integration."""
    # Standard backward integration
    energy = rir**2
    edc = np.cumsum(energy[::-1])[::-1]
    # Normalize to dB
    return 10 * np.log10(edc / (edc.max() + 1e-12) + 1e-12)


def plot_rir_analysis(rir, fs=SAMPLING_RATE):
    t = np.arange(len(rir)) / fs

    fig, axes = plt.subplots(3, 1, figsize=(10, 10), sharex=False)

    # 1. Waveform
    axes[0].plot(t, rir, color="tab:blue", lw=0.5)
    axes[0].set_title("Impulse Response (Time Domain)")
    axes[0].set_ylabel("Amplitude")
    axes[0].set_xlim(0, t.max())
    axes[0].grid(True, alpha=0.3)

    # 2. Spectrogram
    f, t_spec, Sxx = scipy.signal.spectrogram(rir, fs, nperseg=256, noverlap=128)
    pcm = axes[1].pcolormesh(
        t_spec, f, 10 * np.log10(Sxx + 1e-12), shading="gouraud", cmap="inferno"
    )
    axes[1].set_title("Spectrogram")
    axes[1].set_ylabel("Frequency (Hz)")
    axes[1].set_xlim(0, t.max())
    fig.colorbar(pcm, ax=axes[1], label="Power (dB)")

    # 3. Energy Decay Curve
    edc = compute_edc(rir)
    axes[2].plot(t, edc, color="tab:red", lw=1.5)
    axes[2].set_title("Energy Decay Curve (Schroeder Integration)")
    axes[2].set_xlabel("Time (s)")
    axes[2].set_ylabel("Energy (dB)")
    axes[2].set_xlim(0, t.max())
    axes[2].set_ylim(-60, 0)  # Standard T60 range
    axes[2].grid(True, which="both", alpha=0.3)

    plt.tight_layout()
    plt.show()

## 6. Explore Data
Select a Room, Source, and Mic to visualize.

In [None]:
# Example Selection
target_room = 1  # Change this (0-10 based on availability)
target_src = 1  # Change this
target_mic = 5  # Change this

try:
    print(f"Fetching RIR for Room={target_room}, Src={target_src}, Mic={target_mic}...")
    rir_signal = loader.get_rir(target_room, target_src, target_mic)
    plot_rir_analysis(rir_signal)
except Exception as e:
    print(f"Error: {e}")

In [None]:
# 7. Bulk Statistics (Optional)
# Quickly check max amplitude across a slice to check for clipping or silence
print("Checking RIR statistics for first source in Room 0...")
with h5py.File(H5_PATH, "r") as f:
    # Slice: Room 0, Src 0, All Mics, All Time
    # Indices: [room_idx, src_idx, mic_idx, time]
    # Let's assume indices map 0->0 for simplicity here, or use maps
    slice_data = f["rir"][0, 0, :, :]

    print(f"Slice shape: {slice_data.shape}")
    print(f"Max Amp: {np.max(np.abs(slice_data)):.4f}")
    print(f"Mean Power: {np.mean(slice_data**2):.6f}")