In [None]:
# %% [code]
# 1) Environment check & installs
!nvidia-smi
!pip install --upgrade pip
# Install PyTorch (CUDA 11.8 wheel) -- adjust if Colab CUDA changes
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install openvoice-cli gradio soundfile librosa --quiet
!apt-get install -y git-lfs
!git lfs install
print('✅ Installs complete')

/bin/bash: line 1: nvidia-smi: command not found
Looking in indexes: https://download.pytorch.org/whl/cu118
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.3).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
Git LFS initialized.
✅ Installs complete


In [None]:
# 2) Download OpenVoice checkpoints (Hugging Face)
# This may take time depending on network. It clones a repo into ./checkpoints
!rm -rf checkpoints # Remove existing checkpoints directory
!git clone https://huggingface.co/myshell-ai/OpenVoiceV2 checkpoints || true
!ls -la checkpoints || true
print('✅ Checkpoints folder ready')

Cloning into 'checkpoints'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 35 (delta 2), reused 0 (delta 0), pack-reused 29 (from 1)[K
Unpacking objects: 100% (35/35), 12.52 KiB | 512.00 KiB/s, done.
total 40
drwxr-xr-x 5 root root 4096 Aug 30 08:36 .
drwxr-xr-x 1 root root 4096 Aug 30 08:36 ..
drwxr-xr-x 3 root root 4096 Aug 30 08:36 base_speakers
drwxr-xr-x 2 root root 4096 Aug 30 08:36 converter
-rw-r--r-- 1 root root 6148 Aug 30 08:36 .DS_Store
drwxr-xr-x 9 root root 4096 Aug 30 08:36 .git
-rw-r--r-- 1 root root 1519 Aug 30 08:36 .gitattributes
-rw-r--r-- 1 root root 5371 Aug 30 08:36 README.md
✅ Checkpoints folder ready


In [1]:
# 3) Upload reference audio and optional input audio
from google.colab import files
import os, librosa, soundfile as sf, numpy as np
uploaded = files.upload('/content/Original-Audio')
filenames = list(uploaded.keys())
print('Uploaded files:', filenames)
if len(filenames)==0:
  raise SystemExit('Upload at least one audio file (reference).')
# Use first uploaded as reference; if second uploaded, use as input narration
ref_path = filenames[0]
input_path = filenames[1] if len(filenames)>1 else filenames[0]
print('Reference:', ref_path, 'Input:', input_path)

Saving 01 Escape from Madness.mp3 to /content/Original-Audio/01 Escape from Madness.mp3
Uploaded files: ['/content/Original-Audio/01 Escape from Madness.mp3']
Reference: /content/Original-Audio/01 Escape from Madness.mp3 Input: /content/Original-Audio/01 Escape from Madness.mp3


In [2]:
# 4) Trim/pad uploaded audio to exactly 30 seconds (30s = 30*sr samples)
import math
import os
TARGET_SEC = 30.0
def ensure_30s(infile, outfile, sr=16000):
    wav, _ = librosa.load(infile, sr=sr, mono=True)
    target_len = int(TARGET_SEC*sr)
    if len(wav) > target_len:
        wav = wav[:target_len]
    elif len(wav) < target_len:
        # pad by repeating or silence
        pad_len = target_len - len(wav)
        wav = np.concatenate([wav, np.zeros(pad_len)])
    sf.write(outfile, wav, sr)
    return outfile

# Create the samples directory if it doesn't exist
os.makedirs('samples', exist_ok=True)

ref_30 = 'samples/original_30s.wav'
inp_30 = 'samples/input_30s.wav'

ensure_30s(ref_path, ref_30, sr=16000)
ensure_30s(input_path, inp_30, sr=16000)

print('✅ Created 30s trimmed/padded files:', ref_30, inp_30)

✅ Created 30s trimmed/padded files: samples/original_30s.wav samples/input_30s.wav


In [3]:
# 5) Run OpenVoice CLI conversion (single) to convert input -> cloned voice using reference
import shlex, subprocess, sys, os
ref = 'samples/original_30s.wav'
inp = 'samples/input_30s.wav'
out = 'samples/cloned_30s.wav'
cmd = f"python -m openvoice_cli.single -i {inp} -r {ref} -o {out} -d cpu"
print('Running:', cmd)
try:
    import subprocess as sp
    gpu_check = sp.run(['nvidia-smi'], stdout=sp.PIPE, stderr=sp.PIPE)
    if gpu_check.returncode==0:
        cmd = f"python -m openvoice_cli.single -i {inp} -r {ref} -o {out} -d cuda:0"
    res = sp.run(shlex.split(cmd), stdout=sp.PIPE, stderr=sp.PIPE, text=True)
    print('STDOUT:\n', res.stdout)
    print('STDERR:\n', res.stderr)
except Exception as e:
    print('Error running OpenVoice CLI:', e)
print('✅ Conversion command finished — check samples/cloned_30s.wav')

Running: python -m openvoice_cli.single -i samples/input_30s.wav -r samples/original_30s.wav -o samples/cloned_30s.wav -d cpu
Error running OpenVoice CLI: [Errno 2] No such file or directory: 'nvidia-smi'
✅ Conversion command finished — check samples/cloned_30s.wav


In [4]:
# 6) Play back original vs cloned
from IPython.display import Audio, display
import os
import librosa
import ipywidgets as widgets

original_audio_dir = "/content/Original-Audio"

# List only .wav or .mp3 files
audio_files = [f for f in os.listdir(original_audio_dir) if f.endswith((".wav", ".mp3"))]


print(f"Available audio files in directory ({len(audio_files)}):")
for f in audio_files:
    print("-", f)  # show names only

if len(audio_files) == 0:
    raise ValueError("No audio files found in directory!")

for f in audio_files:
    path = os.path.join(original_audio_dir, f)
    audio, sr = librosa.load(path, sr=None)
    display(Audio(audio, rate=sr))
    print(f"Loaded {f}: {audio.shape}, sample rate={sr}")

# Function to play selected audio
def play_audio(change):
    file_path = os.path.join(original_audio_dir, change['new'])
    print(f"▶️ Playing: {change['new']}")
    display(Audio(file_path, rate=16000))

print('Original (trimmed to 30s):')
display(Audio('samples/original_30s.wav', rate=16000))
print('Cloned (30s):')

Available audio files in directory (1):
- 01 Escape from Madness.mp3


Loaded 01 Escape from Madness.mp3: (1687560,), sample rate=44100
Original (trimmed to 30s):


Cloned (30s):
