Outputs (overwritten after each compilation):
- `experiment/resources/audio/con_run1_*.wav`
- `experiment/resources/audio/abs_run1_*.wav`
- `experiment/resources/audio/con_run2_*.wav`
- `experiment/resources/audio/abs_run2_*.wav`
- `experiment/resources/audio/base_run1.wav`
- `experiment/resources/audio/base_run2.wav`


In [1]:
from __future__ import annotations

import os
import shutil
import subprocess
from pathlib import Path
import librosa

import numpy as np
import pandas as pd


In [2]:
# Paths (repo layout)
THIS_DIR = Path.cwd()                # stimuli_selection/scripts/
ROOT = THIS_DIR.parents[1]           # repo root

LISTS_DIR = ROOT / "experiment" / "resources" / "lists"
AUDIO_DIR = ROOT / "experiment" / "resources" / "audio"
AUDIO_DIR.mkdir(parents=True, exist_ok=True)

STIMSEL = ROOT / "stimuli_selection"
MODELS_DIR = STIMSEL / "models" / "piper"
MODELS_DIR.mkdir(parents=True, exist_ok=True)

LISTS_DIR, AUDIO_DIR, MODELS_DIR


(WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/experiment/resources/lists'),
 WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/experiment/resources/audio'),
 WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/stimuli_selection/models/piper'))

In [3]:
# Read lists from 01_ notebook outputs
files = [
    "con_run1.csv","abs_run1.csv","base_run1.csv",
    "con_run2.csv","abs_run2.csv","base_run2.csv",
]
dfs = {f: pd.read_csv(LISTS_DIR / f) for f in files}
{k: v.shape for k,v in dfs.items()}


{'con_run1.csv': (24, 4),
 'abs_run1.csv': (24, 4),
 'base_run1.csv': (24, 4),
 'con_run2.csv': (24, 4),
 'abs_run2.csv': (24, 4),
 'base_run2.csv': (24, 4)}

In [4]:
# Piper TTS setup (offline-friendly)
# - If you have internet: the cell below can download a Polish model + config from HuggingFace.
# - If you are offline: put the two files manually into MODELS_DIR and set MODEL_PATH/CONFIG_PATH.

VOICE = "pl_PL-gosia-medium"  # good, widely used
MODEL_PATH = MODELS_DIR / f"{VOICE}.onnx"
CONFIG_PATH = MODELS_DIR / f"{VOICE}.onnx.json"

MODEL_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/pl/pl_PL/gosia/medium/pl_PL-gosia-medium.onnx?download=true"
CONFIG_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/pl/pl_PL/gosia/medium/pl_PL-gosia-medium.onnx.json?download=true"

MODEL_PATH, CONFIG_PATH


(WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/stimuli_selection/models/piper/pl_PL-gosia-medium.onnx'),
 WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/stimuli_selection/models/piper/pl_PL-gosia-medium.onnx.json'))

In [5]:
def ensure_piper_installed():
    # Prefer CLI if available, else try python package.
    for exe in ["piper", "piper.exe", "piper-tts", "piper-tts.exe"]:
        if shutil.which(exe):
            return "cli", exe
    try:
        import piper  # noqa: F401
        return "py", None
    except Exception:
        return None, None

backend, cli_exe = ensure_piper_installed()
backend, cli_exe


('cli', 'piper')

In [6]:
# Optional: download the model (requires internet)
# If this fails due to network, download manually later.
def download_file(url: str, out_path: Path):
    import requests
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with requests.get(url, stream=True, timeout=120) as r:
        r.raise_for_status()
        with open(out_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=1024*1024):
                if chunk:
                    f.write(chunk)

if not MODEL_PATH.exists() or not CONFIG_PATH.exists():
    print("Downloading Piper model/config ...")
    try:
        download_file(MODEL_URL, MODEL_PATH)
        download_file(CONFIG_URL, CONFIG_PATH)
    except Exception as e:
        print("Download failed:", repr(e))
        print("If offline: manually place the model+json into:", MODELS_DIR)

MODEL_PATH.exists(), CONFIG_PATH.exists()


(True, True)

In [7]:
# Audio I/O helpers
def read_wav(path: Path):
    import soundfile as sf
    x, sr = sf.read(path, dtype="float32")
    if x.ndim > 1:
        x = x.mean(axis=1)
    return x, sr

def write_wav(path: Path, x: np.ndarray, sr: int):
    import soundfile as sf
    path.parent.mkdir(parents=True, exist_ok=True)
    sf.write(path, x.astype(np.float32), sr, subtype="PCM_16")

def pad_or_trim(x: np.ndarray, target_len: int):
    if len(x) == target_len:
        return x
    if len(x) > target_len:
        return x[:target_len]
    pad = np.zeros(target_len - len(x), dtype=x.dtype)
    return np.concatenate([x, pad])

def prepend_silence(x: np.ndarray, sr: int, silence_s: float = 0.25) -> np.ndarray:
    n_silence = int(round(silence_s * sr))
    silence = np.zeros(n_silence, dtype=x.dtype)
    return np.concatenate([silence, x])

def slow_down(x: np.ndarray, rate: float = 0.5) -> np.ndarray:
    # rate < 1.0 = slower
    return librosa.effects.time_stretch(x, rate=rate)

# Try to ensure soundfile is present
try:
    import soundfile as sf  # noqa
except Exception as e:
    raise RuntimeError("Missing dependency: soundfile. Install it in your notebook env: pip install soundfile") from e


In [8]:
# TTS synthesis (Piper)
def synth_piper_cli(text: str, out_wav: Path, model: Path, config: Path, exe: str):
    # Piper CLI reads text from stdin
    cmd = [exe, "--model", str(model), "--config", str(config), "--output_file", str(out_wav)]
    p = subprocess.run(cmd, input=text.encode("utf-8"), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if p.returncode != 0:
        raise RuntimeError(f"Piper CLI failed: {p.stderr.decode('utf-8', errors='ignore')[:500]}")

def synth_piper_py(text: str, out_wav: Path, model: Path, config: Path):
    # Python API may vary by version; try common entrypoints.
    try:
        from piper.voice import PiperVoice
        voice = PiperVoice.load(str(model), str(config))
        voice.synthesize(text, str(out_wav))
        return
    except Exception:
        pass
    try:
        from piper import PiperVoice  # type: ignore
        voice = PiperVoice.load(str(model), str(config))
        voice.synthesize(text, str(out_wav))
        return
    except Exception as e:
        raise RuntimeError("Piper Python API not available. Install CLI or use the CLI backend.") from e

def synth(text: str, out_wav: Path):
    if not MODEL_PATH.exists() or not CONFIG_PATH.exists():
        raise FileNotFoundError("Missing Piper model/config. Put them into MODELS_DIR or run the download cell.")
    if backend == "cli":
        synth_piper_cli(text, out_wav, MODEL_PATH, CONFIG_PATH, cli_exe)
    elif backend == "py":
        synth_piper_py(text, out_wav, MODEL_PATH, CONFIG_PATH)
    else:
        raise RuntimeError("Piper not installed. Easiest: pip install piper (or install CLI).")

# quick smoke-test (comment out if you want)
# test_path = AUDIO_DIR / "_tts_test.wav"
# synth("To jest test.", test_path)
# test_path.exists()


In [9]:
# Collect all target (word, file) pairs from the list CSVs
def collect_pairs(df: pd.DataFrame) -> list[tuple[str, Path]]:
    pairs = []
    for _, row in df.iterrows():
        stim = str(row.get("stimFile", "")).strip()
        word = str(row.get("word", "")).strip()
        if not stim or stim.lower() == "nan":
            continue
        # stimFile is relative to experiment/, e.g. "resources/audio/xxx.wav"
        out = ROOT / "experiment" / stim.replace("\\", "/")
        # Skip BASE here (handled separately)
        if str(row.get("condition","")).upper() == "BASE":
            continue
        pairs.append((word, out))
    # unique by output path (in case of repeats)
    seen = set()
    uniq = []
    for w,p in pairs:
        if p in seen:
            continue
        seen.add(p)
        uniq.append((w,p))
    return uniq

pairs = []
pairs += collect_pairs(dfs["con_run1.csv"])
pairs += collect_pairs(dfs["abs_run1.csv"])
pairs += collect_pairs(dfs["con_run2.csv"])
pairs += collect_pairs(dfs["abs_run2.csv"])

len(pairs), pairs[:3]


(96,
 [('rakieta',
   WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/experiment/resources/audio/con_run1_001.wav')),
  ('prysznic',
   WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/experiment/resources/audio/con_run1_002.wav')),
  ('owca',
   WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/experiment/resources/audio/con_run1_003.wav'))])

In [11]:
# Pass 1: synthesize to temp, measure durations, choose a common target duration.
TMP_DIR = AUDIO_DIR / "_tmp_raw"
TMP_DIR.mkdir(parents=True, exist_ok=True)

raw_paths = []
durations = []

for i, (word, out_path) in enumerate(pairs, start=1):
    tmp = TMP_DIR / out_path.name
    synth(word, tmp)  # overwrite temp
    x, sr = read_wav(tmp)

    # silence
    x = slow_down(x, rate=2)
    x = prepend_silence(x, sr, silence_s=0.25)
    raw_paths.append((tmp, out_path))
    durations.append(len(x) / sr)

durations = np.array(durations, dtype=float)
durations.min(), np.median(durations), durations.max(), len(durations)


(np.float64(0.42993197278911566),
 np.float64(0.5779591836734694),
 np.float64(0.8072562358276644),
 96)

In [12]:
# Choose target duration suggested by the data (snap to 0.25s grid)
GRID = np.array([0.75, 1.00, 1.25, 1.50], dtype=float)  # extend if needed
median_d = float(np.median(durations))
target_d = float(GRID[np.argmin(np.abs(GRID - median_d))])

SR_TARGET = 22050  # gosia model card uses 22050 Hz
TARGET_SAMPLES = int(round(target_d * SR_TARGET))

median_d, target_d, TARGET_SAMPLES


(0.5779591836734694, 0.75, 16538)

In [13]:
# Pass 2: pad/trim to common duration, write to final locations (overwrite)
for tmp, out_path in raw_paths:
    x, sr = read_wav(tmp)

    # slow down + silence
    x = slow_down(x, rate=0.5)
    x = prepend_silence(x, sr, silence_s=0.25)
    if sr != SR_TARGET:
        # resample if needed
        x = librosa.resample(x, orig_sr=sr, target_sr=SR_TARGET)
        sr = SR_TARGET
    x2 = pad_or_trim(x, TARGET_SAMPLES)
    write_wav(out_path, x2, sr)

# cleanup temp
for tmp, _ in raw_paths:
    try:
        tmp.unlink()
    except Exception:
        pass
try:
    TMP_DIR.rmdir()
except Exception:
    pass

"AUDIO_WRITTEN"


'AUDIO_WRITTEN'

In [14]:
# BASELINE: noise-vocoded speech (standard degraded-speech baseline)
# We generate a short phrase from selected words, synthesize it, vocode it, and crop/pad to target duration.

def noise_vocode(x: np.ndarray, sr: int, n_bands: int = 6, seed: int = 0) -> np.ndarray:
    from scipy.signal import butter, filtfilt, hilbert

    rng_local = np.random.default_rng(seed)

    x = x.astype(np.float32)
    x = x / (np.max(np.abs(x)) + 1e-8)

    # log-spaced bands
    f_lo, f_hi = 80.0, min(8000.0, sr / 2 - 100.0)
    edges = np.logspace(np.log10(f_lo), np.log10(f_hi), n_bands + 1)

    y = np.zeros_like(x)
    for b in range(n_bands):
        low, high = edges[b], edges[b + 1]

        sos_b, sos_a = butter(
            4,
            [low / (sr / 2), high / (sr / 2)],
            btype="bandpass",
        )

        band = filtfilt(sos_b, sos_a, x)
        env = np.abs(hilbert(band))

        noise = rng_local.standard_normal(len(x)).astype(np.float32)
        noise_band = filtfilt(sos_b, sos_a, noise)

        y += env * noise_band

    y = y / (np.max(np.abs(y)) + 1e-8)
    return y


def make_base(run: int, out_name: str, n_words: int = 6, n_bands: int = 6, seed: int = 1234):
    rng_local = np.random.default_rng(seed + run)

    # take words from that run's CON+ABS lists
    words = []
    for f in [f"con_run{run}.csv", f"abs_run{run}.csv"]:
        words += dfs[f]["word"].astype(str).tolist()

    rng_local.shuffle(words)
    phrase = " ".join(words[:n_words])

    tmp = AUDIO_DIR / f"_base_tmp_run{run}.wav"
    synth(phrase, tmp)

    x, sr = read_wav(tmp)
    if sr != SR_TARGET:
        import librosa
        x = librosa.resample(x, orig_sr=sr, target_sr=SR_TARGET)
        sr = SR_TARGET

    # slow down + silence BEFORE vocoding
    x = slow_down(x, rate=0.5)
    x = prepend_silence(x, sr, silence_s=0.25)

    y = noise_vocode(x, sr=sr, n_bands=n_bands, seed=seed + run)

    y = pad_or_trim(y, TARGET_SAMPLES)

    out_path = AUDIO_DIR / out_name
    write_wav(out_path, y, sr)

    try:
        tmp.unlink()
    except Exception:
        pass

    return out_path, phrase


base1_path, base1_phrase = make_base(1, "base_run1.wav", n_words=6, n_bands=6)
base2_path, base2_phrase = make_base(2, "base_run2.wav", n_words=6, n_bands=6)

base1_path, base2_path, base1_phrase


(WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/experiment/resources/audio/base_run1.wav'),
 WindowsPath('c:/Users/kinga/Documents/Blindbrain/4. Courses/fMRI - design of the experiment and data analysis/cognes-auditory-1back-pilot/experiment/resources/audio/base_run2.wav'),
 'przyjaźń tragedia jaszczurka lustro idiota zagadka')

In [15]:
# Quick audit
def list_audio(prefix: str):
    return sorted([p.name for p in AUDIO_DIR.glob(prefix)])

len(list_audio("con_run1_*.wav")), len(list_audio("abs_run1_*.wav")), (AUDIO_DIR/"base_run1.wav").exists()


(24, 24, True)