# Advanced RVC Inference - No UI Version

This notebook provides a streamlined, command-line style interface for RVC inference with enhanced features:
- Better error handling and validation
- Batch processing capabilities
- Audio preview before and after conversion
- Preset configurations
- Model management utilities
- Progress indicators

In [None]:
# Installation and Setup
import os
import sys
import subprocess
from pathlib import Path
import json
import time
import glob
import shutil
import warnings
warnings.filterwarnings('ignore')

print("Setting up Advanced RVC environment...")

# Check if we're in Google Colab
try:
    import google.colab
    IN_COLAB = True
    print("Running in Google Colab")
except:
    IN_COLAB = False
    print("Running in local environment")

# Install dependencies if requirements.txt exists
if os.path.exists("requirements.txt"):
    print("Installing dependencies...")
    subprocess.run(["pip", "install", "-r", "requirements.txt"], check=True)
else:
    print("requirements.txt not found, skipping dependency installation")

# Download prerequisites if the script exists
if os.path.exists("programs/applio_code/rvc/lib/tools/prerequisites_download.py"):
    print("Downloading prerequisites...")
    subprocess.run(["python", "programs/applio_code/rvc/lib/tools/prerequisites_download.py"], check=True)
else:
    print("Prerequisites download script not found, skipping")

# Create necessary directories
os.makedirs("models", exist_ok=True)
os.makedirs("audio_files/input", exist_ok=True)
os.makedirs("audio_files/output", exist_ok=True)
os.makedirs("presets", exist_ok=True)

# Import required modules
try:
    from core import full_inference_program
    import torch
    import numpy as np
    import soundfile as sf
    from IPython.display import Audio, display, HTML
    import matplotlib.pyplot as plt
    from tqdm.notebook import tqdm
    print("All modules imported successfully")
except ImportError as e:
    print(f"Error importing modules: {e}")
    print("Please ensure all dependencies are installed")

# Check GPU availability
if torch.cuda.is_available():
    print(f"GPU available: {torch.cuda.get_device_name(0)}")
    devices = "0"
else:
    print("No GPU available, using CPU")
    devices = "cpu"

print("Setup completed successfully!")

In [None]:
# Model Management Utilities

def list_available_models():
    """List all available RVC models"""
    models_path = "models"
    if not os.path.exists(models_path):
        print(f"Models directory not found: {models_path}")
        return []
    
    pth_files = glob.glob(os.path.join(models_path, "*.pth"))
    model_names = [os.path.basename(f) for f in pth_files]
    
    if not model_names:
        print("No .pth model files found in the models directory")
        return []
    
    print("Available models:")
    for i, name in enumerate(model_names, 1):
        print(f"{i}. {name}")
    
    return model_names

def find_index_file(model_name):
    """Find the corresponding index file for a model"""
    base_name = os.path.splitext(model_name)[0]
    possible_extensions = [".index", ".npy", ".bin"]
    
    for ext in possible_extensions:
        index_path = os.path.join("models", f"{base_name}{ext}")
        if os.path.exists(index_path):
            return index_path
    
    # Try to find any index file with similar name
    index_files = glob.glob(os.path.join("models", f"{base_name}*"))
    index_files = [f for f in index_files if any(f.endswith(ext) for ext in possible_extensions)]
    
    if index_files:
        return index_files[0]
    
    return None

def upload_model():
    """Upload a model file (for Colab)"""
    if not IN_COLAB:
        print("This function is only available in Google Colab")
        return
    
    from google.colab import files
    uploaded = files.upload()
    
    for filename in uploaded.keys():
        if filename.endswith(".pth"):
            shutil.move(filename, os.path.join("models", filename))
            print(f"Model {filename} uploaded to models directory")
        elif any(filename.endswith(ext) for ext in [".index", ".npy", ".bin"]):
            shutil.move(filename, os.path.join("models", filename))
            print(f"Index file {filename} uploaded to models directory")
        else:
            print(f"Skipping unsupported file: {filename}")

# List available models
available_models = list_available_models()

In [None]:
# Preset Configurations

def save_preset(name, config):
    """Save a configuration preset"""
    preset_path = os.path.join("presets", f"{name}.json")
    with open(preset_path, 'w') as f:
        json.dump(config, f, indent=2)
    print(f"Preset '{name}' saved to {preset_path}")

def load_preset(name):
    """Load a configuration preset"""
    preset_path = os.path.join("presets", f"{name}.json")
    if not os.path.exists(preset_path):
        print(f"Preset '{name}' not found")
        return None
    
    with open(preset_path, 'r') as f:
        config = json.load(f)
    print(f"Preset '{name}' loaded")
    return config

def list_presets():
    """List all available presets"""
    preset_files = glob.glob(os.path.join("presets", "*.json"))
    preset_names = [os.path.splitext(os.path.basename(f))[0] for f in preset_files]
    
    if not preset_names:
        print("No presets found")
        return []
    
    print("Available presets:")
    for i, name in enumerate(preset_names, 1):
        print(f"{i}. {name}")
    
    return preset_names

# Create default presets if they don't exist
if not os.path.exists("presets/high_quality.json"):
    high_quality_preset = {
        "pitch": 0,
        "index_rate": 0.8,
        "rms_mix_rate": 0.25,
        "protect": 0.33,
        "hop_length": 128,
        "filter_radius": 3,
        "f0_method": "rmvpe",
        "split_audio": True,
        "autotune": False,
        "denoise": True,
        "export_format": "wav"
    }
    save_preset("high_quality", high_quality_preset)

if not os.path.exists("presets/fast_conversion.json"):
    fast_preset = {
        "pitch": 0,
        "index_rate": 0.7,
        "rms_mix_rate": 0.5,
        "protect": 0.33,
        "hop_length": 256,
        "filter_radius": 3,
        "f0_method": "harvest",
        "split_audio": False,
        "autotune": False,
        "denoise": False,
        "export_format": "wav"
    }
    save_preset("fast_conversion", fast_preset)

# List available presets
available_presets = list_presets()

In [None]:
# Configuration Parameters

# Model settings
model_path = "models/your_model.pth"  # Path to your RVC model
index_path = "models/your_index.index"  # Path to your index file
model_name = "contentvec"  # or other model types

# Audio settings
input_audio_path = "audio_files/input/input.wav"  # Your input audio file
output_path = "audio_files/output/output.wav"  # Output file path

# Conversion parameters
pitch = 0  # Pitch adjustment in semitones
index_rate = 0.75  # How much to use the index file (0.0-1.0)
rms_mix_rate = 0.25  # RMS mix rate (0.0-1.0)
protect = 0.33  # Protection for voiceless consonants (0.0-1.0)
hop_length = 128  # Hop length for audio processing
filter_radius = 3  # Filter radius
split_audio = True  # Whether to split audio for processing large files
autotune = False  # Apply autotune
f0_method = "rmvpe"  # Method for pitch extraction ('harvest', 'crepe', 'rmvpe', etc.)

# Processing options
vocal_model = "MDX23C"  # Vocal separation model
karaoke_model = "UVR-BVE"  # Karaoke model
dereverb_model = "UVR-Deecho-Dereverb"  # Dereverb model
deecho = False  # Apply deecho
denoise = False  # Apply denoising
reverb = False  # Apply reverb

# Output format
export_format = "wav"  # Output format ('wav', 'mp3', 'flac')

# Batch processing
batch_mode = False  # Enable batch processing
batch_input_dir = "audio_files/input"  # Directory with input files for batch processing
batch_output_dir = "audio_files/output"  # Directory for batch output

# Advanced options
use_tta = False  # Use Test Time Augmentation
batch_size = 1  # Batch size for processing
delete_audios = False  # Delete intermediate audio files

print(f"Configuration set. Model: {model_path}, Input: {input_audio_path}")
print(f"Using device: {devices}")
print(f"Batch mode: {batch_mode}")

In [None]:
# Audio Utilities

def list_audio_files(directory):
    """List all audio files in a directory"""
    if not os.path.exists(directory):
        print(f"Directory not found: {directory}")
        return []
    
    audio_extensions = [".wav", ".mp3", ".flac", ".m4a", ".ogg"]
    audio_files = []
    
    for ext in audio_extensions:
        audio_files.extend(glob.glob(os.path.join(directory, f"*{ext}")))
    
    if not audio_files:
        print(f"No audio files found in {directory}")
        return []
    
    print(f"Found {len(audio_files)} audio files in {directory}:")
    for i, file in enumerate(audio_files, 1):
        print(f"{i}. {os.path.basename(file)}")
    
    return audio_files

def upload_audio():
    """Upload audio files (for Colab)"""
    if not IN_COLAB:
        print("This function is only available in Google Colab")
        return
    
    from google.colab import files
    uploaded = files.upload()
    
    audio_extensions = [".wav", ".mp3", ".flac", ".m4a", ".ogg"]
    
    for filename in uploaded.keys():
        if any(filename.lower().endswith(ext) for ext in audio_extensions):
            shutil.move(filename, os.path.join("audio_files/input", filename))
            print(f"Audio file {filename} uploaded to audio_files/input directory")
        else:
            print(f"Skipping unsupported file: {filename}")

def preview_audio(audio_path, title="Audio Preview"):
    """Display an audio player with visualization"""
    if not os.path.exists(audio_path):
        print(f"Audio file not found: {audio_path}")
        return
    
    try:
        # Load audio file
        audio_data, sample_rate = sf.read(audio_path)
        
        # Create waveform plot
        plt.figure(figsize=(12, 3))
        time_axis = np.linspace(0, len(audio_data) / sample_rate, num=len(audio_data))
        plt.plot(time_axis, audio_data)
        plt.title(f"{title} - Waveform")
        plt.xlabel("Time (s)")
        plt.ylabel("Amplitude")
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        
        # Display audio player
        display(Audio(audio_path, autoplay=False))
        
        # Print audio info
        duration = len(audio_data) / sample_rate
        print(f"Audio Info:")
        print(f"- Duration: {duration:.2f} seconds")
        print(f"- Sample Rate: {sample_rate} Hz")
        print(f"- Channels: {1 if len(audio_data.shape) == 1 else audio_data.shape[1]}")
    except Exception as e:
        print(f"Error previewing audio: {e}")
        # Fallback to just display the audio player
        try:
            display(Audio(audio_path, autoplay=False))
        except Exception as e2:
            print(f"Could not display audio player: {e2}")

def convert_audio_format(input_path, output_path, target_format="wav"):
    """Convert audio to a different format"""
    try:
        audio_data, sample_rate = sf.read(input_path)
        sf.write(output_path, audio_data, sample_rate, format=target_format)
        print(f"Audio converted to {target_format}: {output_path}")
        return True
    except Exception as e:
        print(f"Error converting audio: {e}")
        return False

# List available input audio files
input_audio_files = list_audio_files("audio_files/input")

In [None]:
# Helper Functions

def validate_config():
    """Validate the configuration parameters"""
    errors = []
    warnings = []
    
    # Check model file
    if not os.path.exists(model_path):
        errors.append(f"Model file not found: {model_path}")
    
    # Check index file
    if index_path and not os.path.exists(index_path):
        warnings.append(f"Index file not found: {index_path}")
    
    # Check input audio file
    if not batch_mode and not os.path.exists(input_audio_path):
        errors.append(f"Input audio file not found: {input_audio_path}")
    
    # Check batch input directory
    if batch_mode and not os.path.exists(batch_input_dir):
        errors.append(f"Batch input directory not found: {batch_input_dir}")
    
    # Validate parameter ranges
    if not 0 <= index_rate <= 1:
        errors.append(f"index_rate must be between 0 and 1, got {index_rate}")
    
    if not 0 <= rms_mix_rate <= 1:
        errors.append(f"rms_mix_rate must be between 0 and 1, got {rms_mix_rate}")
    
    if not 0 <= protect <= 1:
        errors.append(f"protect must be between 0 and 1, got {protect}")
    
    # Validate f0_method
    valid_f0_methods = ["harvest", "crepe", "rmvpe", "dio", "mangio-crepe", "mangio-dio"]
    if f0_method not in valid_f0_methods:
        warnings.append(f"f0_method '{f0_method}' may not be supported. Valid options: {valid_f0_methods}")
    
    # Validate export format
    valid_formats = ["wav", "mp3", "flac"]
    if export_format not in valid_formats:
        warnings.append(f"export_format '{export_format}' may not be supported. Valid options: {valid_formats}")
    
    # Print validation results
    if errors:
        print("❌ Configuration errors found:")
        for error in errors:
            print(f"  - {error}")
        return False
    
    if warnings:
        print("⚠️ Configuration warnings:")
        for warning in warnings:
            print(f"  - {warning}")
    
    print("✅ Configuration validation passed")
    return True

def run_conversion(input_file=None, output_file=None, progress_callback=None):
    """Run the RVC conversion with the specified parameters"""
    # Use provided files or defaults
    input_path = input_file if input_file else input_audio_path
    output_path = output_file if output_file else output_path
    
    # Check if files exist
    if not os.path.exists(input_path):
        print(f"Input file not found: {input_path}")
        return None
    
    if not os.path.exists(model_path):
        print(f"Model file not found: {model_path}")
        return None
    
    # Create output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    print(f"Starting RVC conversion...")
    print(f"Input: {input_path}")
    print(f"Output: {output_path}")
    print(f"Model: {model_path}")
    print(f"Pitch: {pitch}")
    print(f"F0 Method: {f0_method}")
    
    start_time = time.time()
    
    try:
        if progress_callback:
            progress_callback(0.1, "Initializing conversion...")
        
        result = full_inference_program(
            model_path=model_path,
            index_path=index_path,
            input_audio_path=input_path,
            output_path=output_path,
            export_format_rvc=export_format,
            split_audio=split_audio,
            autotune=autotune,
            vocal_model=vocal_model,
            karaoke_model=karaoke_model,
            dereverb_model=dereverb_model,
            deecho=deecho,
            deecho_model="UVR-Deecho-Normal",
            denoise=denoise,
            denoise_model="UVR Denoise",
            reverb=reverb,
            vocals_volume=0.0,
            instrumentals_volume=0.0,
            backing_vocals_volume=0.0,
            export_format_final=export_format,
            devices=devices,
            pitch=pitch,
            filter_radius=filter_radius,
            index_rate=index_rate,
            rms_mix_rate=rms_mix_rate,
            protect=protect,
            pitch_extract=f0_method,
            hop_lenght=hop_length,
            reverb_room_size=0.15,
            reverb_damping=0.7,
            reverb_wet_gain=0.1,
            reverb_dry_gain=0.8,
            reverb_width=1.0,
            embedder_model=model_name,
            delete_audios=delete_audios,
            use_tta=use_tta,
            batch_size=batch_size,
            infer_backing_vocals=False,
            infer_backing_vocals_model="",
            infer_backing_vocals_index="",
            change_inst_pitch=0,
            pitch_back=0,
            filter_radius_back=3,
            index_rate_back=0.75,
            rms_mix_rate_back=0.25,
            protect_back=0.33,
            pitch_extract_back="harvest",
            hop_length_back=128,
            export_format_rvc_back=export_format,
            split_audio_back=False,
            autotune_back=False,
            embedder_model_back=model_name,
        )
        
        if progress_callback:
            progress_callback(1.0, "Conversion completed!")
        
        elapsed_time = time.time() - start_time
        print(f"Conversion completed in {elapsed_time:.2f} seconds")
        print(f"Output file: {result[1]}")
        return result[1]
    except Exception as e:
        print(f"Conversion failed: {e}")
        import traceback
        traceback.print_exc()
        return None

def run_batch_conversion():
    """Run batch conversion on all audio files in the input directory"""
    if not validate_config():
        return
    
    input_files = list_audio_files(batch_input_dir)
    if not input_files:
        return
    
    os.makedirs(batch_output_dir, exist_ok=True)
    
    success_count = 0
    fail_count = 0
    
    # Create progress bar
    pbar = tqdm(input_files, desc="Processing files")
    
    for input_file in pbar:
        filename = os.path.basename(input_file)
        name_without_ext = os.path.splitext(filename)[0]
        output_file = os.path.join(batch_output_dir, f"{name_without_ext}_converted.{export_format}")
        
        pbar.set_description(f"Processing {filename}")
        
        result = run_conversion(input_file, output_file)
        if result:
            success_count += 1
        else:
            fail_count += 1
    
    print(f"\nBatch conversion completed: {success_count} successful, {fail_count} failed")

# Validate configuration
is_valid = validate_config()

In [None]:
# Preview Input Audio

if not batch_mode and os.path.exists(input_audio_path):
    print("Input Audio Preview:")
    preview_audio(input_audio_path, "Input Audio")
elif batch_mode:
    print("Batch mode enabled - skipping individual audio preview")
else:
    print(f"Input audio file not found: {input_audio_path}")
    print("Please update the input_audio_path in the configuration cell")

In [None]:
# Run Conversion

# Create a progress callback
def progress_callback(progress, message):
    print(f"[{progress*100:.0f}%] {message}")

if batch_mode:
    # Run batch conversion
    run_batch_conversion()
else:
    # Run single file conversion
    if is_valid:
        output_file = run_conversion(progress_callback=progress_callback)
        
        if output_file:
            print(f"\n✅ Conversion successful! Output saved to: {output_file}")
            
            # Preview the result
            print("\nOutput Audio Preview:")
            preview_audio(output_file, "Output Audio")
        else:
            print("\n❌ Conversion failed. Please check the configuration and try again.")
    else:
        print("\n❌ Configuration validation failed. Please fix the errors before running conversion.")

## Usage Instructions

### Basic Usage
1. Update the configuration parameters in the 'configuration' cell:
   - Set your model_path to the RVC model file
   - Set your input_audio_path to the audio file you want to convert
   - Adjust conversion parameters as needed

2. Run all cells in order

3. Check the output file after conversion completes

### Batch Processing
To process multiple files:
1. Set batch_mode = True in the configuration
2. Set batch_input_dir to the directory containing your input files
3. Set batch_output_dir to where you want the outputs saved
4. Run all cells

### Using Presets
1. Load a preset: `config = load_preset("preset_name")`
2. Apply the preset to your configuration
3. Save your own presets: `save_preset("name", your_config_dict)`

### Model Management
- List available models: `list_available_models()`
- Upload a model (Colab only): `upload_model()`
- Find index file for a model: `find_index_file(model_name)`

### Audio Utilities
- List audio files: `list_audio_files("directory_path")`
- Upload audio files (Colab only): `upload_audio()`
- Preview audio: `preview_audio("file_path", "Title")`
- Convert audio format: `convert_audio_format(input, output, format)`

## Parameters Explanation

### Core Parameters
- `pitch`: Pitch adjustment in semitones (positive = higher, negative = lower)
- `index_rate`: How much to use the index file (0.0-1.0, higher = more faithful to model)
- `f0_method`: Method for pitch extraction ('harvest', 'crepe', 'rmvpe', etc.)
  - 'rmvpe': Best quality, slower
  - 'harvest': Good balance
  - 'crepe': Good for high-pitched voices
- `protect`: Protection for voiceless consonants (0.0-1.0, higher = more protection)

### Advanced Parameters
- `hop_length`: Hop length for audio processing (lower = better quality, slower)
- `filter_radius`: Filter radius for audio processing
- `rms_mix_rate`: RMS mix rate (0.0-1.0)
- `split_audio`: Whether to split audio for processing large files
- `autotune`: Apply autotune to the output

### Processing Options
- `denoise`: Apply denoising
- `deecho`: Apply deecho
- `reverb`: Apply reverb
- `use_tta`: Use Test Time Augmentation (better quality, slower)

## Troubleshooting

### Common Issues
1. **Model not found**: Make sure the model_path is correct and the file exists
2. **Index file not found**: The index file should have the same name as the model but with .index extension
3. **Conversion fails**: Try reducing hop_length or changing f0_method
4. **Low quality output**: Try increasing index_rate or using a different f0_method
5. **Out of memory errors**: Try reducing batch_size or using CPU instead of GPU

### Performance Tips
- For faster conversion: Use 'harvest' f0_method, increase hop_length, disable split_audio
- For better quality: Use 'rmvpe' f0_method, decrease hop_length, enable use_tta
- For large files: Enable split_audio

## Note
This is a command-line style notebook with minimal UI elements. For more advanced features, use Advanced-RVC.ipynb