<a href="https://colab.research.google.com/github/ArkanDash/Advanced-RVC-Inference/blob/master/Advanced-RVC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Advanced RVC Inference:

<big> for quicker and effortless model downloads

In [None]:
#@title Check GPU
!nvidia-smi

In [None]:
# @title Installation


from rvc.lib.tools.prerequisites_download import prerequisites_download_pipeline


from IPython.display import clear_output
import torch
import tensorflow as tf

print("\n\033[1m=== Environment Verification ===\033[0m")\
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Version: {torch.version.cuda}")
print(f"GPU Device: {torch.cuda.get_device_name(0)}")


url = "https://github.com/kiurobox/python-rvc-cli.git"


!git clone $url
clear_output()

%cd /content/python-rvc-cli


!pip install -r requirements.txt
!pip uninstall torch torchvision torchaudio -y
!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cu121
clear_output()
print("Finished installing requirements!")







if __name__ == "__main__":
    prerequisites_download_pipeline(models=True, exe=True)

In [None]:
#@title audio separator for infernece



import os
import glob
import yt_dlp

def downloader(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
            'preferredquality': '192',
        }],
        'outtmpl': os.path.join(f'{path}/temp', '%(title)s.%(ext)s'),
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

def checker(url):
    return "http" in url

def uvr_cli(audio_input, output_folder, model, output_format, segment_size, overlap, batch_size, override_model_segment_size, use_autocast, extensions):
    found_files = []

    dictmodel = {
        'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
        'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
        'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
        'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
        'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',
        'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',
        'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',
        'Mel-Roformer-Denoise-Aufr33-Aggr' : 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',
        'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',
        'MelBand Roformer | Vocals by Kimberley Jensen' : 'vocals_mel_band_roformer.ckpt',
        'MelBand Roformer Kim | FT by unwa' : 'mel_band_roformer_kim_ft_unwa.ckpt',
        'MelBand Roformer Kim | Inst V1 by Unwa' : 'melband_roformer_inst_v1.ckpt',
        'MelBand Roformer Kim | Inst V1 (E) by Unwa' : 'melband_roformer_inst_v1e.ckpt',
        'MelBand Roformer Kim | Inst V2 by Unwa' : 'melband_roformer_inst_v2.ckpt',
        'MelBand Roformer Kim | InstVoc Duality V1 by Unwa' : 'melband_roformer_instvoc_duality_v1.ckpt',
        'MelBand Roformer Kim | InstVoc Duality V2 by Unwa' : 'melband_roformer_instvox_duality_v2.ckpt',
        'MelBand Roformer | De-Reverb by anvuew' : 'dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt',
        'MelBand Roformer | De-Reverb Less Aggressive by anvuew' : 'dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt',
        'MelBand Roformer | De-Reverb-Echo by Sucial' : 'dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt',
        'MelBand Roformer | De-Reverb-Echo V2 by Sucial' : 'dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt',
        'MelBand Roformer Kim | SYHFT by SYH99999' : 'MelBandRoformerSYHFT.ckpt',
        'MelBand Roformer Kim | SYHFT V2 by SYH99999' : 'MelBandRoformerSYHFTV2.ckpt',
        'MelBand Roformer Kim | SYHFT V2.5 by SYH99999' : 'MelBandRoformerSYHFTV2.5.ckpt',
        'MelBand Roformer Kim | SYHFT V3 by SYH99999' : 'MelBandRoformerSYHFTV3Epsilon.ckpt',
        'MelBand Roformer Kim | Big SYHFT V1 by SYH99999' : 'MelBandRoformerBigSYHFTV1.ckpt',
        'MelBand Roformer Kim | Big Beta 4 FT by unwa' : 'melband_roformer_big_beta4.ckpt',
        'MelBand Roformer Kim | Big Beta 5e FT by unwa' : 'melband_roformer_big_beta5e.ckpt',
        'BS Roformer | Chorus Male-Female by Sucial' : 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt',
        'MelBand Roformer | Aspiration by Sucial' : 'aspiration_mel_band_roformer_sdr_18.9845.ckpt',
        'MelBand Roformer | Aspiration Less Aggressive by Sucial' : 'aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt',
        'MelBand Roformer | Bleed Suppressor V1 by unwa-97chris' : 'mel_band_roformer_bleed_suppressor_v1.ckpt'
    }
    roformer_model = dictmodel[model]

    if checker(audio_input):
        downloader(audio_input)
        audio_input = f"{path}/temp"

    for audio_files in os.listdir(audio_input):
        if audio_files.endswith(extensions):
            found_files.append(audio_files)

    total_files = len(found_files)

    if total_files == 0:
        print("No valid audio files found.")
    else:
        print(f"{total_files} audio files found")

        found_files.sort()

        for audio_files in found_files:
            file_path = os.path.join(audio_input, audio_files)
            prompt = f'audio-separator "{file_path}" --model_filename {roformer_model} --output_dir={output_folder} --output_format={output_format} --mdxc_segment_size={segment_size} --mdxc_overlap={overlap} --mdxc_batch_size={batch_size} --model_file_dir=./models'
            if override_model_segment_size:
                prompt += " --mdxc_override_model_segment_size"
            if use_autocast:
                prompt += " --use_autocast"
            !$prompt

    if audio_input == f"{path}/temp":
        temp_files = glob.glob(f"{path}/temp/*")
        for file in temp_files:
            os.remove(file)

#@markdown Input path for audio files or link:
audio_input = "" #@param {type:"string"}
#@markdown You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)

#@markdown Output path for audio files:
output_folder = "/content/Vocales" #@param {type:"string"}
#@markdown Select the model:
model = "BS-Roformer-Viperx-1297" #@param ["BS-Roformer-Viperx-1297", "BS-Roformer-Viperx-1296", "BS-Roformer-Viperx-1053", "Mel-Roformer-Viperx-1143", "BS-Roformer-De-Reverb", "Mel-Roformer-Crowd-Aufr33-Viperx", "Mel-Roformer-Denoise-Aufr33", "Mel-Roformer-Denoise-Aufr33-Aggr", "Mel-Roformer-Karaoke-Aufr33-Viperx", "MelBand Roformer | Vocals by Kimberley Jensen", "MelBand Roformer Kim | FT by unwa", "MelBand Roformer Kim | Inst V1 by Unwa", "MelBand Roformer Kim | Inst V1 (E) by Unwa", "MelBand Roformer Kim | Inst V2 by Unwa", "MelBand Roformer Kim | InstVoc Duality V1 by Unwa", "MelBand Roformer Kim | InstVoc Duality V2 by Unwa", "MelBand Roformer | De-Reverb by anvuew", "MelBand Roformer | De-Reverb Less Aggressive by anvuew", "MelBand Roformer | De-Reverb-Echo by Sucial", "MelBand Roformer | De-Reverb-Echo V2 by Sucial", "MelBand Roformer Kim | SYHFT by SYH99999", "MelBand Roformer Kim | SYHFT V2 by SYH99999", "MelBand Roformer Kim | SYHFT V2.5 by SYH99999", "MelBand Roformer Kim | SYHFT V3 by SYH99999", "MelBand Roformer Kim | Big SYHFT V1 by SYH99999", "MelBand Roformer Kim | Big Beta 4 FT by unwa", "MelBand Roformer Kim | Big Beta 5e FT by unwa", "BS Roformer | Chorus Male-Female by Sucial", "MelBand Roformer | Aspiration by Sucial", "MelBand Roformer | Aspiration Less Aggressive by Sucial", "MelBand Roformer | Bleed Suppressor V1 by unwa-97chris"]
#@markdown Select the output format:
output_format = "wav" #@param ["wav", "flac", "mp3", "ogg", "opus", "m4a", "aiff", "ac3"]
#@markdown Larger consumes more resources, but may give better results.
segment_size = 256 #@param {type:"slider", min:32, max:4000, step:32}
#@markdown Amount of overlap between prediction windows.
overlap = 8 #@param {type:"slider", min:2, max:10, step:1}
#@markdown Larger consumes more RAM but may process slightly faster.
batch_size = 1 #@param {type:"slider", min:1, max:16, step:1}
#@markdown Override model default segment size instead of using the model default value.
override_model_segment_size = False #@param {type:"boolean"}
#@markdown Flag to use PyTorch autocast for faster inference. Do not use for CPU inference.
use_autocast = True #@param {type:"boolean"}
extensions = (".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aiff", ".ac3")

uvr_cli(audio_input, output_folder, model, output_format, segment_size, overlap, batch_size, override_model_segment_size, use_autocast, extensions)

In [None]:
# @title Download model
# @markdown Hugging Face or Google Drive
model_link = "https://huggingface.co/Bredvige/Sonic2/resolve/main/Sonic.zip"  # @param {type:"string"}

!python rvc_cli.py download --model_link "{model_link}"

In [None]:
# @title Run Inference

import os

current_dir = os.getcwd()

model_name = ""  # @param {type:"string"}
model_folder = os.path.join(current_dir, f"logs/{model_name}")

if not os.path.exists(model_folder):
    raise FileNotFoundError(f"Model directory not found: {model_folder}")

files_in_folder = os.listdir(model_folder)
pth_path = next((f for f in files_in_folder if f.endswith(".pth")), None)
index_file = next((f for f in files_in_folder if f.endswith(".index")), None)

if pth_path is None or index_file is None:
    raise FileNotFoundError("No model found.")

pth_file = os.path.join(model_folder, pth_path)
index_file = os.path.join(model_folder, index_file)

input_path = ""  # @param {type:"string"}
output_path = "/content/output.wav" # @param {type:"string"}
export_format = "WAV"  # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A'] {allow-input: false}
f0_method = "hybrid[rmvpe+fcpe]"  # @param ["crepe", "crepe-tiny", "rmvpe", "fcpe", "hybrid[rmvpe+fcpe]"] {allow-input: false}
f0_up_key = 0  # @param {type:"slider", min:-24, max:24, step:0}
filter_radius = 3  # @param {type:"slider", min:0, max:10, step:0}
rms_mix_rate = 0.8  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
protect = 0.5  # @param {type:"slider", min:0.0, max:0.5, step:0.1}
index_rate = 0.6  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
hop_length = 128  # @param {type:"slider", min:1, max:512, step:0}
clean_strength = 0.7  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
split_audio = False  # @param{type:"boolean"}
clean_audio = False  # @param{type:"boolean"}
f0_autotune = False  # @param{type:"boolean"}
formant_shift = False # @param{type:"boolean"}
formant_qfrency = 1.0 # @param {type:"slider", min:1.0, max:16.0, step:0.1}
formant_timbre = 1.0 # @param {type:"slider", min:1.0, max:16.0, step:0.1}
embedder_model = "contentvec" # @param ["contentvec", "chinese-hubert-base", "japanese-hubert-base", "korean-hubert-base", "custom"] {allow-input: false}
embedder_model_custom = "" # @param {type:"string"}




!python rvc_cli.py infer --pitch "{f0_up_key}" --filter_radius "{filter_radius}" --volume_envelope "{rms_mix_rate}" --index_rate "{index_rate}" --hop_length "{hop_length}" --protect "{protect}" --f0_autotune "{f0_autotune}" --f0_method "{f0_method}" --input_path "{input_path}" --output_path "{output_path}" --pth_path "{pth_file}" --index_path "{index_file}" --split_audio "{split_audio}" --clean_audio "{clean_audio}" --clean_strength "{clean_strength}" --export_format "{export_format}" --embedder_model "{embedder_model}" --embedder_model_custom "{embedder_model_custom}"

In [None]:
#@title play ur audio output


from IPython.display import Audio, display, clear_output


display(Audio(output_path, autoplay=True))