# WASD Data Loader

This notebook provides a clean reference workflow for loading:
- IQ binary captures (`.bin`)
- spectrogram tensors (`.npy`)
- anomaly labels (`.csv`)

It is written to be robust even when dataset files are not present yet.
        


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (10, 4)
        


## 1) Configure dataset root

Update `DATASET_ROOT` to the directory where you extracted dataset files.
        


In [None]:
DATASET_ROOT = Path(".")

# Candidate folder names used in different releases
IQ_CANDIDATES = [DATASET_ROOT / "IQ_Data", DATASET_ROOT / "IQ data"]
NPY_CANDIDATES = [DATASET_ROOT / "npy data", DATASET_ROOT / "NPY_Data"]

IQ_ROOT = next((p for p in IQ_CANDIDATES if p.exists()), IQ_CANDIDATES[0])
NPY_ROOT = next((p for p in NPY_CANDIDATES if p.exists()), NPY_CANDIDATES[0])

print("DATASET_ROOT:", DATASET_ROOT.resolve())
print("IQ_ROOT:", IQ_ROOT)
print("NPY_ROOT:", NPY_ROOT)
        


## 2) IQ loader (`int32` interleaved I/Q)


In [None]:
def load_iq_bin(path: Path, dtype=np.int32) -> np.ndarray:
    # Load interleaved IQ binary [I0,Q0,I1,Q1,...] into complex array.
    raw = np.fromfile(path, dtype=dtype)
    if raw.size % 2 != 0:
        raw = raw[:-1]
    return raw[::2].astype(np.float64) + 1j * raw[1::2].astype(np.float64)


bin_files = sorted(IQ_ROOT.rglob("*.bin"))
print(f"Found {len(bin_files)} IQ .bin files")

if bin_files:
    iq_path = bin_files[0]
    iq = load_iq_bin(iq_path)
    print("Example file:", iq_path)
    print("Samples:", iq.shape[0])

    n = min(5000, iq.shape[0])
    plt.figure()
    plt.plot(iq.real[:n], label="I")
    plt.plot(iq.imag[:n], label="Q", alpha=0.8)
    plt.title("IQ waveform preview")
    plt.xlabel("Sample index")
    plt.ylabel("Amplitude (raw ADC units)")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
else:
    print("No IQ .bin files found. Set DATASET_ROOT to your extracted dataset path.")
        


## 3) Spectrogram loader (`.npy` memmap)


In [None]:
abnormal_npy = NPY_ROOT / "Abnormal" / "Abnormal_spectrogram.npy"

if abnormal_npy.exists():
    input_size = 400
    data = np.memmap(abnormal_npy, dtype=np.uint16, mode="r")
    data = data.reshape(-1, input_size, input_size)

    print("Abnormal spectrogram shape:", data.shape)
    plt.figure(figsize=(5, 5))
    plt.imshow(data[0], aspect="auto", origin="lower", cmap="viridis")
    plt.title("Sample abnormal spectrogram")
    plt.colorbar(label="Magnitude (uint16)")
    plt.tight_layout()
    plt.show()
else:
    print("Not found:", abnormal_npy)
        


## 4) Label loader (`Spectrum_label.csv`)


In [None]:
label_csv = NPY_ROOT / "Abnormal" / "label" / "Spectrum_label.csv"

if label_csv.exists():
    df = pd.read_csv(label_csv)
    print("Rows:", len(df), "Columns:", len(df.columns))
    print(df.head())

    if "filename" in df.columns:
        print("Unique files with labels:", df["filename"].nunique())
else:
    print("Not found:", label_csv)
        


## 5) Next step

For anomaly waveform synthesis examples (tone/chirp/pulse), see:
`WASD_abnormal_signal_generation.ipynb`.
        
