<a href="https://colab.research.google.com/github/MLo7Ghinsan/MLo7-colab-notebook/blob/main/so_vits_svc_notebook_mlo7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**SO-VITS-SVC NOTEBOOK**

You must read and acknowledge the **Terms of Use** in order to understad the guideline.

Notebook made and maintained MLo7.
If there's any error or bug on any part then please report to @MLo7#6969 via discord.

_Notebook updated on: 3/29/2023_

Update log:
+ fixed typo in inference

# **Training Section**

In [None]:
import os
from IPython.display import clear_output
from google.colab import drive
from IPython.display import Audio, display, HTML

drive.mount("/content/drive", force_remount=True)

if not os.path.exists("/content/play_sound"):
    os.makedirs("/content/play_sound")
%cd /content/play_sound
!wget -O setup_complete.wav https://github.com/MLo7Ghinsan/MLo7_Diff-SVC_models/releases/download/audio/setup_complete.wav

#@title #1.0 | SETUP | Install Dependencies and Mount Google Drive

%cd /content
!rm -rf /content/sample_data
!apt-get update
!apt-get install aria2
!git clone https://github.com/svc-develop-team/so-vits-svc -b 4.0
%cd /content/so-vits-svc
!pip install --upgrade pip setuptools numba numpy
!pip install pyworld praat-parselmouth fairseq tensorboardX numba
!aria2c -x 16 -s 16 -j 16 --dir="/content/so-vits-svc/hubert" https://github.com/MLo7Ghinsan/MLo7_Diff-SVC_models/releases/download/diff-svc-necessary-checkpoints/checkpoint_best_legacy_500.pt

clear_output()

print("setup complete!")
print("|")
print("|")
print("|")

chika_dance = '<img src="https://cdn.discordapp.com/attachments/816517150175920138/1090112497446563950/icegif-2013.gif"/>'
display(HTML(chika_dance))

with open("/content/play_sound/setup_complete.wav", "rb") as f:
    setup_complete_sound = f.read()
Audio(data=setup_complete_sound, autoplay=True)

In [None]:
import zipfile
from tqdm import tqdm
#@title #2.0 | Extract data | Resume training from checkpoint
%cd /content
clear_output()
#@markdown ___
#@markdown ###Train from scratch section
#@markdown +=========================+
#@markdown ####Directory of the zip file that contain all of your recordings that you want to use to train a model
train_from_scratch = False #@param {type:"boolean"}
raw_data_zip_path = "path-to-the-zip-file-of-your-recordings"  #@param {type:"string"}
model_name = "your-model-name" #@param {type:"string"}

#@markdown ___
#@markdown ###Resume training section
#@markdown +=======================+
resume_training = False #@param {type:"boolean"}
#@markdown Directory of the zip file that THIS NOTEBOOK saved, or any zip that is in the same structure
preprocessed_data_zip_path = "path-to-saved-data-zip" #@param {type:"string"}
#@markdown ___


if train_from_scratch:
  if not os.path.exists(f"/content/so-vits-svc/dataset_raw/{model_name}"):
    os.makedirs(f"/content/so-vits-svc/dataset_raw/{model_name}")
  with zipfile.ZipFile(raw_data_zip_path, "r") as zip_ref:
    wav_files = [f for f in zip_ref.namelist() if f.endswith('.wav')]
    for file in tqdm(iterable=wav_files, total=len(wav_files), desc="Extracting files", unit="files"):
      zip_ref.extract(member=file, path=f"/content/so-vits-svc/dataset_raw/{model_name}")
  print("Training option: train a model from scratch")
else:
  pass

if resume_training:
  with zipfile.ZipFile(preprocessed_data_zip_path, "r") as zip_ref:
    for file in tqdm(iterable=zip_ref.namelist(), total=len(zip_ref.namelist()), desc="Extracting files", unit="files"):
      zip_ref.extract(member=file, path="/content/so-vits-svc")
  print(" Training option: resume training from preprocessed data")
else:
  pass

print("|")
print("|")
print("|")
print("Done!")

In [None]:
#@title #2.1 Start Preprocessing

#@markdown Run this cell either way, even if you already preprocessed your data

import zipfile
import os
from tqdm import tqdm

%cd /content/so-vits-svc

if train_from_scratch:
  !python resample.py
  clear_output()
  !python preprocess_flist_config.py
  clear_output()
  !python preprocess_hubert_f0.py
  clear_output()

  sovits_data_dir = "/content/drive/MyDrive/so-vits_colab_files"

  if not os.path.exists(sovits_data_dir):
    os.makedirs(sovits_data_dir)
  else:
    pass

  configs_folder = "/content/so-vits-svc/configs"
  flists_folder = "/content/so-vits-svc/filelists"
  dataset_folder = "/content/so-vits-svc/dataset"

  with zipfile.ZipFile(sovits_data_dir + "/" + model_name + "_preprocessed_data.zip", "w", zipfile.ZIP_DEFLATED) as zip_file:
    #config
    for folder_name, subfolders, filenames in tqdm(os.walk(configs_folder), desc="Zipping configs folder"):
      for filename in filenames:
        file_path = os.path.join(folder_name, filename)
        zip_file.write(file_path, os.path.relpath(file_path, configs_folder))
    
    #list files
    for folder_name, subfolders, filenames in tqdm(os.walk(flists_folder), desc="Zipping filelists folder"):
      for filename in filenames:
        file_path = os.path.join(folder_name, filename)
        zip_file.write(file_path, os.path.relpath(file_path, flists_folder))
    
    #dataset
    for folder_name, subfolders, filenames in tqdm(os.walk(dataset_folder), desc="Zipping dataset folder"):
      for filename in filenames:
        file_path = os.path.join(folder_name, filename)
        zip_file.write(file_path, os.path.relpath(file_path, dataset_folder))

  print("\n")
  print(f"Necessary data folders zipped and saved to {sovits_data_dir}!")
else:
  pass

if resume_training:
  print("You already have the preprocessed files!")
else:
  pass

print("|")
print("|")
print("|")
print("Check the configs folder and dataset folder inside so-vits-svc to make sure you are good to go before start training")
print(".... Or you can just ignore this message")

In [None]:
import os
#@title #3.0 Start training
#@markdown ####Use custom save directory
#@markdown This is recommended so you won't lose your progress if colab decides to kick you

#@markdown This section also determine if you gonna resume training from latest checkpoint or not

#@markdown """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

#@markdown Automatically resume training from a checkpoint when you link model_save_directory to the folder that has "44k" folder in it

#@markdown [Example]

#@markdown training from scratch: /content/drive/MyDrive/so-vits_colab_files/44k

#@markdown resume training: /content/drive/MyDrive/so-vits_colab_files

#@markdown ---
use_custom_save_directory = True
model_save_directory = "path-to-custom-model-save-directory" #@param {type:"string"}

if use_custom_save_directory:
  search_string = "model_dir = os.path.join"
  target_line_number = None
  with open("/content/so-vits-svc/utils.py", "r") as f:
    lines = f.readlines()
  for i, line in enumerate(lines):
    if search_string in line:
      target_line_number = i
      break
  if target_line_number is None:
    print(f"Error: could not find target string '{search_string}' in utils.py")
  else:
    new_line = f'  model_dir = os.path.join("{model_save_directory}", args.model)\n'
    lines[target_line_number] = new_line
    with open("/content/so-vits-svc/utils.py", "w") as f:
      f.writelines(lines)
    print(f"Your model will be saved at {model_save_directory}")
else:
  print("Your model will be saved inside the logs folder under so-vits-svc root directory")

#@markdown Display tensorboard for the training progress visualization
tensorboard = True  #@param {type:"boolean"}
if tensorboard:
  %load_ext tensorboard
  %tensorboard --logdir {model_save_directory}
!python train.py -c configs/config.json -m 44k

# **Inference Section**
upload your audio into "raw" folder inside so-vits-svc root directory

In [None]:
#@title Start inference

#@markdown Parameters see [README.MD#inference](https://github.com/svc-develop-team/so-vits-svc#inference)

#@markdown

wav_filename = "input_wav.wav"  #@param {type:"string"}
model_path = "path-to-model"  #@param {type:"string"}
model_name = "your-model-name"  #@param {type:"string"}
config_path = "path-to-config"  #@param {type:"string"}
trans = "0"  #@param {type:"string"}
cluster_infer_ratio = "0"  #@param {type:"string"}

#@markdown

#@markdown Generally keep default:

slice_db = "-40"  #@param {type:"string"}
wav_format = "flac"  #@param {type:"string"}
wav_output = "/content/so-vits-svc/results/" + wav_filename + "_" + trans + "key" + "_" + model_name + "." + wav_format

!python inference_main.py -n {wav_filename} -m {model_path} -c {config_path} -s {model_name} -t {trans} -cr {cluster_infer_ratio} -sd {slice_db} -wf {wav_format}

#@markdown

#@markdown If you dont want to download from here, uncheck this.
download_after_inference = False  #@param {type:"boolean"}

if download_after_inference:
  from google.colab import files
  files.download(wav_output)