In [1]:
!git clone https://github.com/Plachtaa/seed-vc
%cd seed-vc
!pip install -r requirements.txt
!pip uninstall tensorflow -y

Cloning into 'seed-vc'...
remote: Enumerating objects: 907, done.[K
remote: Counting objects: 100% (237/237), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 907 (delta 184), reused 157 (delta 157), pack-reused 670 (from 2)[K
Receiving objects: 100% (907/907), 66.32 MiB | 41.31 MiB/s, done.
Resolving deltas: 100% (448/448), done.
/kaggle/working/seed-vc
Collecting torch (from -r requirements.txt (line 1))
  Downloading torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision (from -r requirements.txt (line 2))
  Downloading torchvision-0.19.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.0 kB)
Collecting torchaudio (from -r requirements.txt (line 3))
  Downloading torchaudio-2.4.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting scipy==1.13.1 (from -r requirements.txt (line 8))
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━

In [2]:
# from pathlib import Path
# import subprocess

# BASE = Path('/kaggle/input/hindi-song')
# OUTPUT = Path('/kaggle/working/reconstructed_v2_HINDI')
# CONFIG = Path('/kaggle/working/seed-vc/configs/presets/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml')
# CHECKPOINT1 = Path('/kaggle/input/model_checkpoints2/pytorch/default/1/ar_base.pth')
# CHECKPOINT2=Path('/kaggle/input/model_checkpoints2/pytorch/default/1/cfm_small.pth')
# # CHECKPOINT=Path('/kaggle/input/checkpoint/pytorch/default/1/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ema.pth')
# OUTPUT.mkdir(parents=True, exist_ok=True)

# for lang in BASE.iterdir():
#     if not lang.is_dir(): continue
#     lang_out = OUTPUT / lang.name
#     lang_out.mkdir(parents=True, exist_ok=True)

#     refs = list(lang.glob('*.wav'))
#     if not refs: continue
#     ref = refs[0]

#     for src in lang.glob('*.wav'):
#         print(f'Processing {src.name} → {ref.name}')
#         subprocess.run([
#             'python', 'inference_2.py',
#             '--source', str(src),
#             '--target', str(ref),
#             '--output', str(lang_out),
#             '--diffusion-steps', '25',
#             '--length-adjust', '1.0',
#             '--f0-condition', 'True',
#             '--config', str(CONFIG),
#             '--checkpoint', str(CHECKPOINT)
#         ])

In [3]:
from pathlib import Path
import subprocess
import random
import librosa
import soundfile as sf

# Paths
BASE = Path('/kaggle/input/dir-3-songs/Spanish')  # Contains .wav files
OUTPUT = Path('/kaggle/working/reconstructed_multi_lang_es')
OUTPUT.mkdir(parents=True, exist_ok=True)

# Config and model checkpoint
CONFIG = Path('/kaggle/working/seed-vc/configs/presets/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml')
CHECKPOINT = Path('/kaggle/input/checkpoint/pytorch/default/1/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ema.pth')

# Sample rate
sr = 44100

# List all .wav files
wav_files = list(BASE.glob('*.wav'))
total_files = len(wav_files)

print(f"🔍 Found {total_files} WAV files for Singfake generation.\n")

# Iterate with progress tracking
for idx, src in enumerate(wav_files, start=1):
    print(f"🔄 [{idx}/{total_files}] Processing: {src.name}")

    try:
        # Load and resample
        audio, _ = librosa.load(str(src), sr=sr)
        duration = len(audio) / sr
        print(f"  🎧 Loaded ({duration:.2f}s, {sr}Hz)")

        # Save resampled audio
        fixed_src = OUTPUT / f"{src.stem}_sr{sr}.wav"
        sf.write(str(fixed_src), audio, sr)
        print(f"  💾 Resampled and saved to: {fixed_src.name}")

        # Generate 2 variations
        for run_idx in range(2):
            semitone_shift = random.uniform(-2.0, 2.0)
            run_out = OUTPUT / f"{src.stem}_run{run_idx}_shift{semitone_shift:.1f}"
            run_out.mkdir(parents=True, exist_ok=True)

            # Run inference
            print(f"    🎤 Running Singfake (Run {run_idx + 1}, Shift {semitone_shift:.1f} semitones)...")
            result = subprocess.run([
                'python', 'inference.py',
                '--source', str(fixed_src),
                '--target', str(fixed_src),
                '--output', str(run_out),
                '--diffusion-steps', '30',
                '--length-adjust', '1.0',
                '--inference-cfg-rate', '0.7',
                '--f0-condition', 'True',
                '--semi-tone-shift', str(int(semitone_shift)),
                '--config', str(CONFIG),
                '--checkpoint', str(CHECKPOINT),
                '--fp16', 'False'
            ], capture_output=True, text=True)

            if result.returncode == 0:
                print(f"    ✅ Singfake saved at: {run_out}")
            else:
                print(f"    ❌ Error in inference for {src.name}: {result.stderr.strip()}")

    except Exception as e:
        print(f"❗ Error processing {src.name}: {e}")

print("\n🎉 All files processed.")


🔍 Found 500 WAV files for Singfake generation.

🔄 [1/500] Processing: es_2102.wav
  🎧 Loaded (2.00s, 44100Hz)
  💾 Resampled and saved to: es_2102_sr44100.wav
    🎤 Running Singfake (Run 1, Shift -1.4 semitones)...
    ✅ Singfake saved at: /kaggle/working/reconstructed_multi_lang_es/es_2102_run0_shift-1.4
    🎤 Running Singfake (Run 2, Shift 1.2 semitones)...
    ✅ Singfake saved at: /kaggle/working/reconstructed_multi_lang_es/es_2102_run1_shift1.2
🔄 [2/500] Processing: es_2063.wav
  🎧 Loaded (2.00s, 44100Hz)
  💾 Resampled and saved to: es_2063_sr44100.wav
    🎤 Running Singfake (Run 1, Shift -0.6 semitones)...
    ✅ Singfake saved at: /kaggle/working/reconstructed_multi_lang_es/es_2063_run0_shift-0.6
    🎤 Running Singfake (Run 2, Shift -0.8 semitones)...
    ✅ Singfake saved at: /kaggle/working/reconstructed_multi_lang_es/es_2063_run1_shift-0.8
🔄 [3/500] Processing: es_2251.wav
  🎧 Loaded (2.00s, 44100Hz)
  💾 Resampled and saved to: es_2251_sr44100.wav
    🎤 Running Singfake (Run 1, S

In [4]:
import os
import shutil

out_folder = '/kaggle/working/reconstructed_multi_lang_es'
zip_file_name = 'singfakes_es.zip'

# Full path to the zip file
zip_file_path = os.path.join('/kaggle/working', zip_file_name)

# Delete existing zip file if it exists
if os.path.exists(zip_file_path):
    os.remove(zip_file_path)

# Create a new zip file
shutil.make_archive(base_name=zip_file_path.replace('.zip', ''), format='zip', root_dir=out_folder)

print(f"Updated zip file created at: {zip_file_path}")

Updated zip file created at: /kaggle/working/singfakes_es.zip
