<a href="https://colab.research.google.com/github/MLo7Ghinsan/MLo7-colab-notebook/blob/main/RVC_notebook_mlo7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Retrieval-based-Voice-Conversion (RVC) NOTEBOOK**

Notebook made and maintained MLo7. If there's any error or bug on any part then please report to @MLo7#6969 via discord

Notebook updated on: 4/25//2023

Update log:
+ notebook cleanup
+ public release (usable)

Coming soon:
+ inference section

# + === + Setup section + === +

In [None]:
#@title # Setup
#@markdown ###[Install any dependencies and necessary models]
from IPython.display import clear_output, display
from google.colab import drive
drive.mount("/content/drive")

!apt-get -y install build-essential python3-dev ffmpeg
!pip install --upgrade setuptools wheel
!pip install --upgrade pip
!apt -y install -qq aria2
!git clone https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI
%cd /content/Retrieval-based-Voice-Conversion-WebUI
!mkdir -p pretrained uvr5_weights
!pip install tqdm
!pip install -r requirements.txt
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt

clear_output()

print("|")
print("|")
print("|")
print("Setup complete!")

# + === + Training section + === +

In [None]:
#@title #Extract dataset

import zipfile
import os
from tqdm import tqdm

#@markdown ###[Path to the zip file containing your wav data]
raw_data_zip_path = "/content/drive/MyDrive/RVC_data.zip"  #@param {type:"string"}

#@markdown Path to extract audio files (it doesn't really matter where; keeping it under content to save space

ext_dir = "/content/dataset"  #@param {type:"string"} # Change this to the directory where you want to extract the WAV files, but tbh, I dont think that its necessary to make it a param but oh well

with zipfile.ZipFile(raw_data_zip_path, "r") as raw_data_zip:
  all_files = raw_data_zip.namelist()
  wav_files = [f for f in all_files if f.lower().endswith(".wav")]
  for wav_file in tqdm(wav_files):
    raw_data_zip.extract(wav_file, path=ext_dir)
  # adding renaming cus people would make the file name a disaster (why do I just thought of this, I should put it in other files too)
  wav_files = [f for f in os.listdir(ext_dir) if f.endswith(".wav")]
  wav_files.sort()
  for i, wav_file in enumerate(wav_files):
      numbered_name = f"{i+1:5}.wav"
      old_shit = os.path.join(ext_dir, wav_file)
      renamed_wavs = os.path.join(ext_dir, numbered_name)
      os.rename(old_shit, renamed_wavs)



In [None]:
#@title #Preprocess
#@markdown ###[skippable if you are resume training from checkpoint]
import warnings
import os

model_name = "name-of-your-model"  #@param {type:"string"}
save_directory = "path-to-your-save-folder" #@param {type:"string"}
logs_dir = save_directory + "/" + model_name
sample_rate = "48kHz (48,000)" #@param ["32kHz (32,000)", "40kHz (40,000)", "48kHz (48,000)"]
if sample_rate == "24kHz (24,000)":
  rate = 32000
elif sample_rate == "44kHz (44,100)":
  rate = 40000
else:
  rate = 48000
pitch_detector = "harvest" #@param ["parselmouth", "harvest", "dio"]
if pitch_detector == "parselmouth":
  pd = "pm"
elif pitch_detector == "harvest":
  pd = "harvest"
else:
  pd = "dio"
thread_count = 8 #@param {type:"slider", min:0, max:24, step:1}
clear_output_messages = True #@param {type:"boolean"}
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)
warnings.filterwarnings("ignore")
print("Running trainset_preprocess_pipeline_print.py")
!python -u trainset_preprocess_pipeline_print.py {ext_dir} {rate} {thread_count} {logs_dir} True
print(".")
print(".")
print(".")
print("complete")
print("|")
print("|")
print("|")
print("Running extract_f0_print.py")
warnings.filterwarnings("ignore")
!python -u extract_f0_print.py {logs_dir} {thread_count} {pitch_detector}
print(".")
print(".")
print(".")
print("complete")
print("|")
print("|")
print("|")
print("Running extract_feature_print.py")
warnings.filterwarnings("ignore")
!python -u extract_feature_print.py cpu 1 0 0 {logs_dir}
print(".")
print(".")
print(".")
print("complete")
if clear_output_messages:
  clear_output()
else:
  pass
print("============================================")
print("|")
print("|")
print("|")
print("Preprocessing | finished")


In [None]:
#@title #Training

import os
import numpy as np
import faiss

#@markdown ###[Adjustable parameters]:
model_name = "name-of-your-model"  #@param {type:"string"}
save_directory = "path-to-your-save-folder" #@param {type:"string"}
logs_dir = save_directory + "/" + model_name
sample_rate = "48kHz (48,000)" #@param ["32kHz (32,000)", "40kHz (40,000)", "48kHz (48,000)"]
if sample_rate == "24kHz (24,000)":
  rate = "32k"
elif sample_rate == "44kHz (44,100)":
  rate = "40k"
else:
  rate = "48k"
batch_size = 32 #@param {type:"slider", min:0, max:150, step:2}
finish_epoch = 1200 #@param {type:"slider", min:0, max:10000, step:100}
save_epoch_interval = 100 #@param {type:"slider", min:0, max:1000, step:10}

#@markdown ####Notes:
#@markdown - 1) Keep save_epoch_interval value lower than finish_epoch if you want to save G and D checkpoints
#@markdown - 2) Finish epoch is the MAX epoch that your model will train to for making the final model
#@markdown ___

#@markdown ###[Mainly keep default]:

use_gpu = "0"  #@param {type:"string"}
cache_data = 1  #@param {type:"integer"}
only_latest = 0  #@param {type:"integer"}

%cd /content/Retrieval-based-Voice-Conversion-WebUI
#%load_ext tensorboard
#%tensorboard --logdir {logs_dir}
exp_dir = logs_dir
os.makedirs(exp_dir, exist_ok=True)

old_pt = logs_dir + "/0_gt_wavs"
!rm -rf {old_pt}/*.pt

gt_wavs_dir = f"{exp_dir}/0_gt_wavs"
co256_dir = f"{exp_dir}/3_feature256"
f0_dir = f"{exp_dir}/2a_f0"
f0nsf_dir = f"{exp_dir}/2b-f0nsf"
dir_list = [gt_wavs_dir, co256_dir, f0_dir, f0nsf_dir]

file_names = []
for file in os.listdir(gt_wavs_dir):
    if os.path.isfile(os.path.join(gt_wavs_dir, file)):
        file_name = os.path.splitext(file)[0]
        file_names.append(file_name)
opt = []

for name in file_names:
    args = [
        f"{gt_wavs_dir}/{name}.wav",
        f"{co256_dir}/{name}.npy",
        f"{f0_dir}/{name}.wav.npy",
        f"{f0nsf_dir}/{name}.wav.npy",
        "0"
    ]
    opt.append("|".join(args))

mute_args = [
    f"/content/Retrieval-based-Voice-Conversion-WebUI/logs/mute/0_gt_wavs/mute{rate}.wav",
    "/content/Retrieval-based-Voice-Conversion-WebUI/logs/mute/3_feature256/mute.npy",
    "/content/Retrieval-based-Voice-Conversion-WebUI/logs/mute/2a_f0/mute.wav.npy",
    "/content/Retrieval-based-Voice-Conversion-WebUI/logs/mute/2b-f0nsf/mute.wav.npy",
    "0"
]
opt.append("|".join(mute_args))

with open(f"{exp_dir}/filelist.txt", "w") as f:
    f.write("\n".join(opt))
with open(f"{exp_dir}/filelist.txt", "r") as file:
    repl = file.read()
    repl = repl.replace(".wav.wav", ".wav")
with open(f"{exp_dir}/filelist.txt", "w") as file:
    file.write(repl)

search_string = "torch.save(opt,"
target_line_number = None
with open("/content/Retrieval-based-Voice-Conversion-WebUI/train/process_ckpt.py", "r") as f:
  lines = f.readlines()
for i, line in enumerate(lines):
  if search_string in line:
    target_line_number = i
    break
if target_line_number is None:
  print(f"Error: could not find target string '{search_string}' in utils.py")
else:
  new_line = f'        torch.save(opt, "%s/{model_name}.pth" % name)\n'
  lines[target_line_number] = new_line
  with open("/content/Retrieval-based-Voice-Conversion-WebUI/train/process_ckpt.py", "w") as f:
    f.writelines(lines)

warnings.filterwarnings("ignore")
!python train_nsf_sim_cache_sid_load_pretrain.py -e {logs_dir} -sr {rate} -f0 1 -bs {batch_size} -g {use_gpu} -te {finish_epoch} -se {save_epoch_interval} -pg pretrained/f0G{rate}.pth -pd pretrained/f0D{rate}.pth -l {only_latest} -c {cache_data} 2> /dev/null

search_string2 = "inp_root ="
target_line_number2 = None
with open("/content/Retrieval-based-Voice-Conversion-WebUI/infer/train-index.py", "r") as f:
  lines2 = f.readlines()
for i, line in enumerate(lines2):
  if search_string2 in line:
    target_line_number2 = i
    break
if target_line_number2 is None:
  print(f"Error: could not find target string '{search_string2}' in utils.py")
else:
  new_line2 = f'inp_root = "{logs_dir}/3_feature256"\n'
  lines2[target_line_number2] = new_line2
  with open("/content/Retrieval-based-Voice-Conversion-WebUI/infer/train-index.py", "w") as f:
    f.writelines(lines2)
!python infer/train-index.py 2> /dev/null
!mv /content/Retrieval-based-Voice-Conversion-WebUI/infer/added_IVF512_Flat_mi_baseline_src_feat.index {logs_dir}/{model_name}_added_IVF512.index
!mv /content/Retrieval-based-Voice-Conversion-WebUI/infer/big_src_feature_mi.npy {logs_dir}/{model_name}_feature.npy
!mv /content/Retrieval-based-Voice-Conversion-WebUI/infer/trained_IVF512_Flat_mi_baseline_src_feat.index {logs_dir}/{model_name}_trained_IVF512.index
print("|")
print("|")
print("|")
print("Training complete!")



# + === + Inference section + === +

In [None]:
#@title # coming soon, meanwhile, just use the web-ui
!python infer-web.py --colab --pycmd python3