In [18]:

def running_in_colab() -> bool:
    # 1) Most robust: the google.colab package only exists on Colab
    try:
        import google.colab  # type: ignore
        return True
    except Exception:
        pass

    import os
    # 2) Some Colab runtimes expose this env var
    if os.environ.get("COLAB_RELEASE_TAG"):
        return True

    # 3) Fallback: IPython shell name includes 'google.colab' on Colab
    try:
        return "google.colab" in str(get_ipython())
    except NameError:
        return False

IN_COLAB = running_in_colab()
print(f"Are we in Colab? {IN_COLAB}")

if IN_COLAB:
    # Please remember to change your runtime into a GPU runtime before running the notebook
    ! git clone https://github.com/SmoothKen/knn-svc
    %cd knn-svc


    ! pip -q install poetry
    ! poetry config virtualenvs.create false        # install into the current (Colab) env
    ! poetry install --only main --no-interaction --no-ansi --no-root

    ! mkdir -p checkpoints
    ! wget -O checkpoints/g_00810000_opensinger.pt "https://github.com/SmoothKen/knn-svc/releases/download/abracadabra/g_00810000_opensinger_mix_harm_no_amp_0.552.pt"
    ! wget -O checkpoints/g_00898000_mix.pt "https://github.com/SmoothKen/knn-svc/releases/download/abracadabra/g_00898000_mix_harm_no_amp_0.633333.pt"



Are we in Colab? False


In [None]:
# Quick kNN-SVC demo via the CLI entrypoint (ddsp_inference.py)
# 1) Set your source and reference (style) 16kHz mono wav paths
# 2) Configure options (ckpt_type/post_opt/topk/etc.)
# 3) Run the next cell to perform conversion and save output

# REQUIRED: update these to real files you have locally
src_wav_path = "sample_content/Danakil-voice_resampled_16000_cut.wav"          # content/source wav (16kHz, mono)
ref_wav_path = "sample_content/Tiken_lead_07_resampled_16000_cut.wav"        # style/target wav (16kHz, mono)

import os
if not os.path.isfile(src_wav_path):
    print(f"file {src_wav_path} does not exist")
    exit

if not os.path.isfile(ref_wav_path):
    print(f"file {ref_wav_path} does not exist")
    exit

# Options (defaults mirror README examples)
if IN_COLAB:
    ckpt_dir = "checkpoints/"
else:
    ckpt_dir = "/home/ken/Downloads/knn_vc_data/ckpt_saved/"
    
ckpt_type = "mix"            # e.g., mix, wavlm_only, wavlm_only_original, mix_harm_no_amp_*, mix_no_harm_no_amp_*
post_opt  = "post_opt_0.2"   # or "no_post_opt"
topk      = 4
device    = "cuda"            # or "cpu"
prioritize_f0 = True
tgt_loudness_db = -16

# Helper: expected output filename produced by special_match
import os
src_base = os.path.splitext(os.path.basename(src_wav_path))[0]
ref_base = os.path.splitext(os.path.basename(ref_wav_path))[0]
expected_out_wav = os.path.join(os.path.dirname(src_wav_path), f"{src_base}_to_{ref_base}_knn_{ckpt_type}_{post_opt}.wav")
print("Expected output:", expected_out_wav)

Expected output: sample_content/Danakil-voice_resampled_16000_cut_to_Tiken_lead_07_resampled_16000_cut_knn_mix_post_opt_0.2.wav


In [5]:
# Run the CLI: ddsp_inference.py (uses ddsp_hubconf.knn_vc under the hood)
# It will save the converted file next to the source using the expected_out_wav name.

import subprocess, shlex, sys

args = [
    sys.executable,
    "ddsp_inference.py",
    src_wav_path,
    ref_wav_path,
    "--ckpt_dir", ckpt_dir,
    "--ckpt_type", ckpt_type,
    "--post_opt", post_opt,
    "--topk", str(topk),
    "--device", device,
    "--prioritize_f0", str(prioritize_f0).lower(),
    "--tgt_loudness_db", str(tgt_loudness_db),
]
print("Running:", " ".join(shlex.quote(a) for a in args))
completed = subprocess.run(args, capture_output=True, text=True)
print("Return code:", completed.returncode)
if completed.stdout:
    print("--- stdout ---\n", completed.stdout)
if completed.stderr:
    print("--- stderr ---\n", completed.stderr)

# Do not treat non-zero as fatal in notebook contexts; special_match may call sys.exit internally
# We'll proceed to check for the output file.
import os
print("Exists?", os.path.exists(expected_out_wav))

Running: /home/ken/open/knn-svc-master/.venv/bin/python ddsp_inference.py sample_content/Danakil-voice_resampled_16000_cut.wav sample_content/Tiken_lead_07_resampled_16000_cut.wav --ckpt_dir /home/ken/Downloads/knn_vc_data/ckpt_saved/ --ckpt_type mix --post_opt post_opt_0.2 --topk 4 --device cuda --prioritize_f0 true --tgt_loudness_db -16
Return code: 0
--- stdout ---
 weights [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
Loading '/home/ken/Downloads/knn_vc_data/ckpt_saved/g_00898000_mix_harm_no_amp_0.633333 (1).pt'
Complete.
Loaded ckpt from local /home/ken/Downloads/knn_vc_data/ckpt_saved/g_00898000_mix_harm_no_amp_0.633333 (1).pt
[HiFiGAN] Generator loaded with 22,863,744 parameters.
Pretrained WavLM loaded
WavLM-Large loaded with 315,453,120 parameters.
cache dir removed for duration limit
[INFO] Processing a Single File
torch.Size([3001, 200]) torch.Size([3001, 1024])
INFO: using existing f0 file sample_content/Danakil-voice_resampled_16000_cut_f0.n

In [10]:
# Load the converted audio for playback
import torchaudio, torch


content_wav, sr = torchaudio.load(src_wav_path)
style_wav, sr = torchaudio.load(ref_wav_path)

wav, sr = torchaudio.load(expected_out_wav)
# Ensure mono 16kHz
if wav.dim() == 2 and wav.size(0) > 1:
    wav = wav[:1]
if sr != 16000:
    wav = torchaudio.functional.resample(wav, sr, 16000)
    sr = 16000

out_wav = wav.squeeze(0)
print("Loaded:", expected_out_wav, "shape:", tuple(out_wav.shape), "sr:", sr)

Loaded: sample_content/Danakil-voice_resampled_16000_cut_to_Tiken_lead_07_resampled_16000_cut_knn_mix_post_opt_0.2.wav shape: (960320,) sr: 16000


In [7]:
import IPython.display as ipd

In [12]:
ipd.Audio(content_wav.numpy(), rate=sr)

In [13]:
ipd.Audio(style_wav.numpy(), rate=sr)

In [None]:
ipd.Audio(out_wav.numpy(), rate=sr)

In [14]:
# Quick stats
print("Duration (s):", round(out_wav.numel() / sr, 3))
print("Peak amplitude:", float(out_wav.abs().max()))

Duration (s): 60.02
Peak amplitude: 0.7159312963485718


In [14]:
# Optional: save a copy alongside the notebook
# torchaudio.save('knnvc_demo_out.wav', out_wav.unsqueeze(0), sr)
print("Output is already saved here:", expected_out_wav)

Output is already saved here: sample_content/Danakil-voice_resampled_16000_cut_to_Tiken_lead_07_resampled_16000_cut_knn_mix_post_opt_0.2.wav


# Notebook demo

Use this notebook to run a minimal conversion using the CLI entrypoint `ddsp_inference.py`.

Instructions:
- Put 16kHz, mono WAV files on disk for the source (content) and target (style) speakers.
- Update the first cell paths (`src_wav_path`, `ref_wav_path`).
- Run the second cell to generate the converted audio (it will be saved next to the source file).
- Run subsequent cells to load and play the result in the notebook.

In [None]:
# (Optional) Cleanup helpers
# import os
# if os.path.exists(expected_out_wav):
#     os.remove(expected_out_wav)
#     print("Deleted:", expected_out_wav)