In [None]:
# Quick kNN-SVC demo via the CLI entrypoint (ddsp_inference.py)
# 1) Set your source and reference (style) 16kHz mono wav paths
# 2) Configure options (ckpt_type/post_opt/topk/etc.)
# 3) Run the next cell to perform conversion and save output

# REQUIRED: update these to real files you have locally
src_wav_path = "sample_content/Danakil-voice_resampled_16000_cut.wav"          # content/source wav (16kHz, mono)
ref_wav_path = "sample_content/Tiken_lead_07_resampled_16000_cut.wav"        # style/target wav (16kHz, mono)

import os
if not os.path.isfile(src_wav_path):
    print(f"file {src_wav_path} does not exist")
    exit

if not os.path.isfile(ref_wav_path):
    print(f"file {ref_wav_path} does not exist")
    exit

# Options (defaults mirror README examples)
ckpt_dir = "/home/ken/Downloads/knn_vc_data/ckpt_saved/"
ckpt_type = "mix"            # e.g., mix, wavlm_only, wavlm_only_original, mix_harm_no_amp_*, mix_no_harm_no_amp_*
post_opt  = "post_opt_0.2"   # or "no_post_opt"
topk      = 4
device    = "cuda"            # or "cpu"
prioritize_f0 = True
tgt_loudness_db = -16

# Helper: expected output filename produced by special_match
import os
src_base = os.path.splitext(os.path.basename(src_wav_path))[0]
ref_base = os.path.splitext(os.path.basename(ref_wav_path))[0]
expected_out_wav = os.path.join(os.path.dirname(src_wav_path), f"{src_base}_to_{ref_base}_knn_{ckpt_type}_{post_opt}.wav")
print("Expected output:", expected_out_wav)

Expected output: sample_content/Danakil-voice_resampled_16000_cut_to_Tiken_lead_07_resampled_16000_cut_knn_mix_post_opt_0.2.wav


In [17]:
# Run the CLI: ddsp_inference.py (uses ddsp_hubconf.knn_vc under the hood)
# It will save the converted file next to the source using the expected_out_wav name.

import subprocess, shlex, sys

args = [
    sys.executable,
    "ddsp_inference.py",
    src_wav_path,
    ref_wav_path,
    "--ckpt_dir", ckpt_dir,
    "--ckpt_type", ckpt_type,
    "--post_opt", post_opt,
    "--topk", str(topk),
    "--device", device,
    "--prioritize_f0", str(prioritize_f0).lower(),
    "--tgt_loudness_db", str(tgt_loudness_db),
]
print("Running:", " ".join(shlex.quote(a) for a in args))
completed = subprocess.run(args, capture_output=True, text=True)
print("Return code:", completed.returncode)
if completed.stdout:
    print("--- stdout ---\n", completed.stdout)
if completed.stderr:
    print("--- stderr ---\n", completed.stderr)

# Do not treat non-zero as fatal in notebook contexts; special_match may call sys.exit internally
# We'll proceed to check for the output file.
import os
print("Exists?", os.path.exists(expected_out_wav))

Running: /home/ken/open/knn-svc-master/.venv/bin/python ddsp_inference.py sample_content/Danakil-voice_resampled_16000_cut.wav sample_content/Tiken_lead_07_resampled_16000_cut.wav --ckpt_dir /home/ken/Downloads/knn_vc_data/ --ckpt_type mix --post_opt post_opt_0.2 --topk 4 --device cuda --prioritize_f0 true --tgt_loudness_db -16
Return code: 1
--- stdout ---
 weights [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]

--- stderr ---
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
  WeightNorm.apply(module, name, dim)
Traceback (most recent call last):
  File "/home/ken/open/knn-svc-master/.venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd.py", line 3729, in <module>
    main()
  File "/home/ken/open/knn-svc-master/.venv/lib/python3.11/site-packages/debugpy/_vendored/pyde

In [9]:
# Load the converted audio for playback
import torchaudio, torch

wav, sr = torchaudio.load(expected_out_wav)
# Ensure mono 16kHz
if wav.dim() == 2 and wav.size(0) > 1:
    wav = wav[:1]
if sr != 16000:
    wav = torchaudio.functional.resample(wav, sr, 16000)
    sr = 16000

out_wav = wav.squeeze(0)
print("Loaded:", expected_out_wav, "shape:", tuple(out_wav.shape), "sr:", sr)

Loaded: sample_content/Danakil-voice_resampled_16000_cut_to_Tiken_lead_07_resampled_16000_cut_knn_mix_post_opt_0.2.wav shape: (960320,) sr: 16000


In [10]:
import IPython.display as ipd

In [11]:
# Play audio
ipd.Audio(out_wav.numpy(), rate=sr)

In [12]:
# Quick stats
print("Duration (s):", round(out_wav.numel() / sr, 3))
print("Peak amplitude:", float(out_wav.abs().max()))

Duration (s): 60.02
Peak amplitude: 0.687566339969635


In [14]:
# Optional: save a copy alongside the notebook
# torchaudio.save('knnvc_demo_out.wav', out_wav.unsqueeze(0), sr)
print("Output is already saved here:", expected_out_wav)

Output is already saved here: sample_content/Danakil-voice_resampled_16000_cut_to_Tiken_lead_07_resampled_16000_cut_knn_mix_post_opt_0.2.wav


# Notebook demo

Use this notebook to run a minimal conversion using the CLI entrypoint `ddsp_inference.py`.

Instructions:
- Put 16kHz, mono WAV files on disk for the source (content) and target (style) speakers.
- Update the first cell paths (`src_wav_path`, `ref_wav_path`).
- Run the second cell to generate the converted audio (it will be saved next to the source file).
- Run subsequent cells to load and play the result in the notebook.

In [None]:
# (Optional) Cleanup helpers
# import os
# if os.path.exists(expected_out_wav):
#     os.remove(expected_out_wav)
#     print("Deleted:", expected_out_wav)