# pvx Tutorial Notebook\n\nThis notebook demonstrates:\n- loading sample audio\n- time stretching\n- pitch shifting\n- spectrogram plotting\n- artifact comparison for phase locking modes

In [None]:
import sys\nfrom pathlib import Path\nimport numpy as np\nimport soundfile as sf\nimport matplotlib.pyplot as plt\n\nROOT = Path.cwd()\nSRC = ROOT / 'src'\nif str(SRC) not in sys.path:\n    sys.path.insert(0, str(SRC))\n\nfrom pvx.core.voc import VocoderConfig, phase_vocoder_time_stretch, resample_1d

In [None]:
candidate = ROOT / 'examples' / 'speech_seed_1s.wav'\nif candidate.exists():\n    x, sr = sf.read(candidate, always_2d=True)\n    x = np.mean(x, axis=1)\nelse:\n    sr = 24000\n    t = np.arange(int(sr * 1.0)) / sr\n    x = 0.35 * np.sin(2 * np.pi * 180 * t) + 0.18 * np.sin(2 * np.pi * 360 * t)\n\nprint('samples:', x.size, 'sr:', sr)

In [None]:
def stft_db(sig, n_fft=2048, hop=256):\n    w = np.hanning(n_fft)\n    cols = []\n    for i in range(0, max(1, sig.size - n_fft), hop):\n        frame = sig[i:i+n_fft]\n        if frame.size < n_fft:\n            break\n        mag = np.abs(np.fft.rfft(frame * w))\n        cols.append(20 * np.log10(mag + 1e-9))\n    return np.array(cols).T if cols else np.zeros((n_fft // 2 + 1, 1))\n\ndef show_spec(sig, sr, title):\n    S = stft_db(sig)\n    plt.figure(figsize=(10, 3.8))\n    plt.imshow(S, origin='lower', aspect='auto', cmap='magma')\n    plt.title(title)\n    plt.xlabel('Frame')\n    plt.ylabel('Bin')\n    plt.colorbar(label='dB')\n    plt.tight_layout()

In [None]:
cfg = VocoderConfig(\n    n_fft=2048,\n    win_length=2048,\n    hop_size=512,\n    window='hann',\n    center=True,\n    phase_locking='identity',\n    transient_preserve=True,\n    transient_threshold=2.0,\n)\n\ny_stretch = phase_vocoder_time_stretch(x, 1.5, cfg)\nsf.write(ROOT / 'examples' / 'tutorial_stretch.wav', y_stretch, sr)\nshow_spec(y_stretch, sr, 'Time-stretched (1.5x)')

In [None]:
semitones = 3.0\nratio = 2 ** (semitones / 12.0)\nz = phase_vocoder_time_stretch(x, ratio, cfg)\ny_pitch = resample_1d(z, int(round(z.size / ratio)), mode='linear')\nsf.write(ROOT / 'examples' / 'tutorial_pitch_up3.wav', y_pitch, sr)\nshow_spec(y_pitch, sr, 'Pitch up +3 semitones (duration preserved)')

In [None]:
cfg_free = VocoderConfig(\n    n_fft=2048,\n    win_length=2048,\n    hop_size=512,\n    window='hann',\n    center=True,\n    phase_locking='off',\n    transient_preserve=True,\n    transient_threshold=2.0,\n)\n\ncfg_lock = VocoderConfig(\n    n_fft=2048,\n    win_length=2048,\n    hop_size=512,\n    window='hann',\n    center=True,\n    phase_locking='identity',\n    transient_preserve=True,\n    transient_threshold=2.0,\n)\n\ny_free = phase_vocoder_time_stretch(x, 1.25, cfg_free)\ny_lock = phase_vocoder_time_stretch(x, 1.25, cfg_lock)\nsf.write(ROOT / 'examples' / 'tutorial_free_phase.wav', y_free, sr)\nsf.write(ROOT / 'examples' / 'tutorial_locked_phase.wav', y_lock, sr)\n\ndef rms(v):\n    return float(np.sqrt(np.mean(v * v) + 1e-12))\n\nprint('free RMS:', rms(y_free))\nprint('lock RMS:', rms(y_lock))\n\nshow_spec(y_free, sr, 'Free phase (phase_locking=off)')\nshow_spec(y_lock, sr, 'Identity phase locking')

## What to compare by ear\n\n- `tutorial_free_phase.wav` vs `tutorial_locked_phase.wav`: look for transient sharpness and reduction of phasy blur.\n- `tutorial_stretch.wav`: check continuity and whether attacks smear.\n- `tutorial_pitch_up3.wav`: check timbre shift vs pitch correctness.