# **Install Diff-SVC**

In [None]:
#@title #Check GPU type
#@markdown At this stage it's not really necessary, the best it does is let you guess how fast it can render
!nvidia-smi -L
!nvidia-smi

In [None]:
#@title #Install Diff-SVC
#@markdown The stuff you'll need for every other thing afterwards.
#@markdown When you use Diff_SVC_Inference_By_HKP, HKP Will give you the HaKhanhPhuongVIVSI Voice Model in checkpoints folder


from IPython.display import clear_output 
from google.colab import files 
import os
print('Upgrading pip & installing 7zip')
!rm -rf /content/sample_data
!python -m pip install --upgrade pip
!python -m pip install --upgrade wheel
!apt-get install unzip
!pip install gdown

print('Installing torch')
%pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
!pip install --pre torchtext==0.6.0 --no-deps

#@markdown ---

#@markdown ###Select which repo to use
#@markdown The official repo is up-to-date, while UtaUtaUtau's version has Harvest support for improved f0.

print('Installing Diff-SVC')
repo = 'https://github.com/HaKhanhPhuongVIVSI/diff-svc'#@param ["https://github.com/prophesier/diff-svc", "https://github.com/utautautau/diff-svc", "https://github.com/HaKhanhPhuongVIVSI/diff-svc"]
!git clone $repo &> /dev/null
%cd 

print('Installing requirements')
%cd "/content/diff-svc/"
!pip install -r requirements_short.txt
!pip install tensorboard<2.9,>=2.8
%reload_ext tensorboard

%cd "/content/diff-svc/training/"
!rm config.yaml
!gdown 'https://github.com/HaKhanhPhuongVIVSI/Diff-SVC-HKP/releases/download/DiffSVC-HKP/config.yaml' -O config.yaml
%cd "/content/"
!gdown 'https://github.com/HaKhanhPhuongVIVSI/Diff-SVC-HKP/releases/download/DiffSVC-HKP/checkpoints.zip' -O checkpoints.zip
%mkdir -p /content/diff-svc/checkpoints/
!unzip /content/checkpoints.zip -d /content/diff-svc/

print('Done!')

# **Mount Google Drive**

In [None]:
#@title Mount Google Drive
from google.colab import drive

mount_path = '/content/drive/' #@param {type:"string"}
drive.mount(mount_path)

# **Inference**

In [None]:
#@title Load Model

#@markdown Load in the full path of your model and config.  

#@markdown `project_name` is the name of your singer, `model_path`, as the name states, is the path directory to your model (full path), same goes for `config_path`.

#@markdown Example:

#@markdown project_name = test

#@markdown model_path = `/content/drive/MyDrive/Diff-SVC/checkpoints/test/model_ckpt_steps_50000.ckpt`

#@markdown config_path = `/content/drive/MyDrive/Diff-SVC/checkpoints/test/config.yaml`

#@markdown ---
#@markdown ###**Set model location with the name of the speaker:**
#@markdown The model below is a default model, change the settings to use your own model.
#@markdown If you wish to use the pre-trained model and don't have your own model, leave these at their default values.

#@markdown ---
%cd "/content/diff-svc/"

os.environ['PYTHONPATH']='.'

!CUDA_VISIBLE_DEVICES=0
from utils.hparams import hparams
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import utils
import librosa
import torchcrepe
from infer import *
import logging
from infer_tools.infer_tool import *

logging.getLogger('numba').setLevel(logging.WARNING)

print('Importted Modules')

project_name = 'HaKhanhPhuong'#@param {type:"string"}
model_path = '/content/diff-svc/checkpoints/model_ckpt_steps_25000.ckpt'#@param {type:"string"}
config_path = '/content/diff-svc/checkpoints/config.yaml'#@param {type:"string"}
#@markdown ---

#@markdown Disable it if you using CPU version of Diff-SVC Repo
hubert_gpu=True#@param {type:"boolean"}
svc_model = Svc(project_name,config_path,hubert_gpu, model_path)
print('model loaded')



In [None]:
#@title Upload Audio
%cd "/content/diff-svc/raw/"

print("\n\033[34m\033[1mupload your audio")
listfn, length = files.upload().popitem()

%cd "/content/diff-svc/"
print("\n\033[32m\033[1mdone")

In [None]:
#@title Input Audio and Adjust Parameters


wav_fn='/content/diff-svc/raw/test_input.wav'#@param {type:"string"} 
demoaudio, sr = librosa.load(wav_fn)
#@markdown ---
#@markdown If input audio is male voice and your model is female voice, use this
key = 0 #@param {type: "number"}

#@markdown ---

pndm_speedup = 20 #@param {type:"number"} 
wav_gen='test_output.wav'#@param {type:"string"} 

#@markdown ---

add_noise_step = 500#@param {type:"number"} 
thre=0.05 #@param {type:"number"} 

#@markdown ---

#@markdown use_crepe. It's good but slow, sometimes it's pronouncing is wrong
use_crepe=True #@param {type:"boolean"}
#@markdown ---

#@markdown Recommened tick it everytimes
use_pe = True #@param {type:"boolean"}
#@markdown ---

#@markdown You get more realistic results with this, because it mixes your voice model and the original singer’s voice.
use_gt_mel = False #@param {type:"boolean"}

f0_tst, f0_pred, audio = run_clip(svc_model,
                                  file_path=wav_fn, 
                                  key=key, 
                                  acc=pndm_speedup, 
                                  use_crepe=use_crepe, 
                                  use_pe=use_pe, 
                                  thre=0.05,
                                  use_gt_mel=use_gt_mel, 
                                  add_noise_step=add_noise_step,
                                  project_name=project_name,
                                  out_path=wav_gen)

# **Display Results**

In [None]:
#@title Display Results
ipd.display(ipd.Audio(demoaudio, rate=sr))
ipd.display(ipd.Audio(audio, rate=hparams['audio_sample_rate'], normalize=False))

# **Display Graphics**

In [None]:
#@title Display Graphics
%matplotlib inline
f0_gen,_=get_pitch_parselmouth(*svc_model.vocoder.wav2spec(wav_gen),hparams)
f0_tst[f0_tst==0]=np.nan#ground truth f0
f0_pred[f0_pred==0]=np.nan#f0 pe predicted
f0_gen[f0_gen==0]=np.nan#f0 generated
fig=plt.figure(figsize=[15,5])
plt.plot(np.arange(0,len(f0_tst)),f0_tst,color='black')
plt.plot(np.arange(0,len(f0_pred)),f0_pred,color='orange')
plt.plot(np.arange(0,len(f0_gen)),f0_gen,color='red')
plt.axhline(librosa.note_to_hz('C4'),ls=":",c="blue")
plt.axhline(librosa.note_to_hz('G4'),ls=":",c="green")
plt.axhline(librosa.note_to_hz('C5'),ls=":",c="orange")
plt.axhline(librosa.note_to_hz('F#5'),ls=":",c="red")
#plt.axhline(librosa.note_to_hz('A#5'),ls=":",c="black")
plt.show()