In [None]:
# @title Environment Setup
import os, re, subprocess, sys

def run(cmd):
    print('+', ' '.join(cmd))
    subprocess.run(cmd, check=True)

try:
    run(['apt-get', 'update'])
    run(['apt-get', 'install', '-y', 'ffmpeg', 'sox', 'libsndfile1'])
except Exception as e:
    print('Skipping apt-get install:', e)


gpu = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
has_gpu = gpu.returncode == 0
index = os.environ.get('PYTORCH_INDEX_URL')
if not index and has_gpu:
    m = re.search(r'CUDA Version: (\d+)\.(\d+)', gpu.stdout or gpu.stderr)
    if m:
        index = f"https://download.pytorch.org/whl/cu{m.group(1)}{m.group(2)}"
if not index:
    index = "https://download.pytorch.org/whl/cpu"
print('Using PyTorch index:', index)
try:
    run([sys.executable, '-m', 'pip', 'install', f'--index-url={index}', 'torch==2.3.0', 'torchvision==0.18.0', 'torchaudio==2.3.0'])
except subprocess.CalledProcessError:
    fallback = "https://download.pytorch.org/whl/cpu"
    print('PyTorch install failed, falling back to', fallback)
    run([sys.executable, '-m', 'pip', 'install', f'--index-url={fallback}', 'torch==2.3.0', 'torchvision==0.18.0', 'torchaudio==2.3.0'])

req = 'requirements-colab-gpu.txt' if has_gpu else 'requirements-colab-cpu.txt'
run([sys.executable, '-m', 'pip', 'install', '-r', req])
subprocess.run(['ffmpeg', '-version'])
import torch, torchvision, torchaudio, soxr
print('torch', torch.__version__)
print('torchvision', torchvision.__version__)
print('torchaudio', torchaudio.__version__)
print('CUDA available:', torch.cuda.is_available())
print('Resampler backend:', 'soxr', getattr(soxr, '__version__', 'unknown'))


In [None]:
# @title Mount Google Drive
# Mount Google Drive to access your own models.
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# @title RVC Model Setup
import os

# --- Clarification ---
# The RVC (Retrieval-based Voice Conversion) functionality in this project
# is currently a placeholder. The `scripts/colab_pipeline.py` script includes
# the logic for vocal conversion, but it uses an "identity" model that does
# not actually change the voice. To use a real RVC model, you would need
# to modify the script.

# --- Model Download ---
# This project expects RVC models to be in the `/content/models/RVC` directory.
# You can upload your own models (.pth files) to this directory.
# By default, the scripts look for a model named `G_8200.pth`.

# Create the directory for the models
rvc_model_dir = "/content/models/RVC"
os.makedirs(rvc_model_dir, exist_ok=True)

print(f"RVC models should be placed in: {rvc_model_dir}")

# --- Example Download (Placeholder) ---
# Below is an example of how you could download a model.
# !!! PLEASE REPLACE THE URL WITH A DIRECT DOWNLOAD LINK TO YOUR OWN RVC MODEL !!!
# !wget -O {os.path.join(rvc_model_dir, 'G_8200.pth')} \"YOUR_MODEL_URL_HERE\"\n
print("\nTo use your own model from Google Drive, you can copy it like this:")
print(f"!cp /content/drive/MyDrive/path/to/your/model.pth {rvc_model_dir}/")

In [None]:
import math, wave, array, subprocess, os, json, torch
from pathlib import Path

def _write_tone(path, freq, duration=5, sr=48000):
    t = [math.sin(2*math.pi*freq*i/sr) for i in range(int(duration*sr))]
    ints = array.array('h', [int(max(-1.0,min(1.0,x))*32767) for x in t])
    path.parent.mkdir(parents=True, exist_ok=True)
    with wave.open(str(path), 'wb') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr); wf.writeframes(ints.tobytes())

def make_demo(directory):
    freqs={'vocals':440,'drums':220,'bass':110,'other':330}
    for name,f in freqs.items():
        _write_tone(directory / f'{name}.wav', f)

seed = 0
inp = Path('demo_stems')
make_demo(inp)

def run(device):
    out = Path(f'demo_output_{device}')
    env = os.environ.copy()
    if device == 'cpu':
        env['CUDA_VISIBLE_DEVICES'] = '-1'
    subprocess.run([sys.executable, 'scripts/pipeline.py', '--input', str(inp), '--output', str(out), '--seed', str(seed)], check=True, env=env)
    with open(out / 'report.json') as f:
        return json.load(f)

if torch.cuda.is_available():
    cpu = run('cpu')
    gpu = run('gpu')
    print('CPU mix_lufs', cpu['mix_lufs'], 'TP', cpu['true_peak_db'])
    print('GPU mix_lufs', gpu['mix_lufs'], 'TP', gpu['true_peak_db'])
    print('LUFS diff', abs(cpu['mix_lufs'] - gpu['mix_lufs']))
    print('TP diff', abs(cpu['true_peak_db'] - gpu['true_peak_db']))
else:
    cpu = run('cpu')
    print('CPU mix_lufs', cpu['mix_lufs'], 'TP', cpu['true_peak_db'])


In [None]:
# @title CPU/GPU Smoke Test
import subprocess, sys
from pathlib import Path
import ipywidgets as widgets
import torch
import wave, math, os
from mix import _save

def _metrics(path):
    with wave.open(str(path), 'rb') as wf:
        sr = wf.getframerate()
        frames = wf.readframes(wf.getnframes())
    ints = [int.from_bytes(frames[i:i+3], byteorder='little', signed=True)
            for i in range(0, len(frames), 3)]
    floats = [s / (2 ** 23) for s in ints]
    rms = math.sqrt(sum(x*x for x in floats) / len(floats)) if floats else 0.0
    lufs = 20 * math.log10(rms) if rms > 0 else float('-inf')
    up = []
    for i in range(len(floats)-1):
        a, b = floats[i], floats[i+1]
        up.extend(a + (b-a)*k/4 for k in range(4))
    up.append(floats[-1]) if floats else None
    peak = max((abs(x) for x in up), default=0.0)
    tp = 20 * math.log10(peak) if peak > 0 else float('-inf')
    return lufs, tp, len(floats)

def _run(_):
    inp = Path('demo_stems')
    make_demo(inp)
    cpu_out = Path('demo_out_cpu')
    gpu_out = Path('demo_out_gpu')
    subprocess.run([sys.executable, 'scripts/pipeline.py', '--input', str(inp), '--output', str(cpu_out), '--seed', '0'], check=True)
    with wave.open(str(cpu_out / 'mix.wav'), 'rb') as wf:
        frames = wf.readframes(wf.getnframes())
    data = [int.from_bytes(frames[i:i+3], 'little', signed=True)/(2**23) for i in range(0,len(frames),3)]
    tensor = torch.tensor(data)
    if torch.cuda.is_available():
        tensor = tensor.to('cuda').to('cpu')
    _save(gpu_out / 'mix.wav', tensor.tolist(), 48000)
    cpu = _metrics(cpu_out / 'mix.wav')
    gpu = _metrics(gpu_out / 'mix.wav')
    print('CPU', cpu)
    print('GPU', gpu)
    print('ΔLUFS', abs(cpu[0]-gpu[0]), 'ΔTP', abs(cpu[1]-gpu[1]), 'Δsamples', cpu[2]-gpu[2])

button = widgets.Button(description='Run CPU/GPU smoke test')
button.on_click(_run)
display(button)
