Skip to content

Commit

Permalink
fix: change default f0 method from crepe to dio (#100)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: all default values for f0 method changed
  • Loading branch information
34j committed Mar 25, 2023
1 parent 66119ec commit baf58d2
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 28 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,14 @@ Place your dataset like `dataset_raw/{speaker_id}/**/{wav_file}.{any_format}` (s
```shell
svc pre-resample
svc pre-config
svc pre-hubert
svc pre-hubert -fm dio
svc train
```

#### Notes

- Dataset audio duration per file should be <~ 10s or VRAM will run out.
- To change the f0 inference method to CREPE, replace `svc pre-hubert -fm dio` with `svc pre-hubert -fm crepe`. You may need to reduce `--n-jobs` due to performance issues.
- It is recommended to change the batch_size in `config.json` before the `train` command to match the VRAM capacity. As tested, the default requires about 14 GB.

### Further help
Expand Down
3 changes: 2 additions & 1 deletion notebooks/so-vits-svc-fork-4.0.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@
"metadata": {},
"outputs": [],
"source": [
"!svc pre-hubert"
"F0_METHOD = \"dio\" #@param [\"crepe\", \"crepe-tiny\", \"parselmouth\", \"dio\", \"harvest\"]\n",
"!svc pre-hubert -fm {F0_METHOD}"
]
},
{
Expand Down
5 changes: 3 additions & 2 deletions src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def infer(
"-fm",
"--f0-method",
type=click.Choice(["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"]),
default="crepe",
default="dio",
help="f0 prediction method",
)
@click.option("-p", "--pad-seconds", type=float, default=0.02, help="pad seconds")
Expand Down Expand Up @@ -490,6 +490,7 @@ def pre_config(
@click.option(
"-n",
"--n_jobs",
"--n-jobs",
type=int,
default=4,
help="number of jobs (optimal value may depend on your VRAM capacity and audio duration per file)",
Expand All @@ -505,7 +506,7 @@ def pre_config(
"-fm",
"--f0-method",
type=click.Choice(["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"]),
default="crepe",
default="dio",
)
def pre_hubert(
input_dir: Path,
Expand Down
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/default_gui_presets.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"silence_threshold": -35.0,
"transpose": 0.0,
"auto_predict_f0": true,
"f0_method": "crepe",
"f0_method": "dio",
"cluster_infer_ratio": 0.0,
"noise_scale": 0.4,
"pad_seconds": 0.1,
Expand Down
3 changes: 2 additions & 1 deletion src/so_vits_svc_fork/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def load_presets() -> dict:
json.loads(GUI_PRESETS_PATH.read_text()) if GUI_PRESETS_PATH.exists() else {}
)
# prioriy: defaults > users
return {**defaults, **users}
# order: defaults -> users
return {**defaults, **users, **defaults}


def add_preset(name: str, preset: dict) -> dict:
Expand Down
10 changes: 5 additions & 5 deletions src/so_vits_svc_fork/inference/infer_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def get_unit_f0(
speaker: int | str,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
] = "dio",
):
f0 = utils.compute_f0(
audio,
Expand Down Expand Up @@ -172,7 +172,7 @@ def infer(
noise_scale: float = 0.4,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
] = "dio",
) -> tuple[torch.Tensor, int]:
audio = audio.astype(np.float32)
# get speaker id
Expand Down Expand Up @@ -240,7 +240,7 @@ def infer_silence(
noise_scale: float = 0.4,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
] = "dio",
# slice config
db_thresh: int = -40,
pad_seconds: float = 0.5,
Expand Down Expand Up @@ -459,7 +459,7 @@ def infer(
noise_scale: float = 0.4,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
] = "dio",
# slice config
db_thresh: int = -40,
pad_seconds: float = 0.5,
Expand Down Expand Up @@ -519,7 +519,7 @@ def process(
noise_scale: float = 0.4,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
] = "dio",
# slice config
db_thresh: int = -40,
chunk_seconds: float = 0.5,
Expand Down
8 changes: 2 additions & 6 deletions src/so_vits_svc_fork/inference_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ def infer(
auto_predict_f0: bool = False,
cluster_infer_ratio: float = 0,
noise_scale: float = 0.4,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
f0_method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "dio",
# slice config
db_thresh: int = -40,
pad_seconds: float = 0.5,
Expand Down Expand Up @@ -83,9 +81,7 @@ def realtime(
auto_predict_f0: bool = False,
cluster_infer_ratio: float = 0,
noise_scale: float = 0.4,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
f0_method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "dio",
# slice config
db_thresh: int = -40,
pad_seconds: float = 0.5,
Expand Down
12 changes: 3 additions & 9 deletions src/so_vits_svc_fork/preprocess_hubert_f0.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ def _process_one(
sampling_rate: int,
hop_length: int,
device: Literal["cuda", "cpu"] = "cuda",
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
f0_method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "dio",
force_rebuild: bool = False,
):
wav, sr = librosa.load(filepath, sr=sampling_rate)
Expand Down Expand Up @@ -59,9 +57,7 @@ def _process_batch(
sampling_rate: int,
hop_length: int,
pbar_position: int,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
f0_method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "dio",
force_rebuild: bool = False,
):
device = "cuda" if torch.cuda.is_available() else "cpu"
Expand All @@ -83,9 +79,7 @@ def preprocess_hubert_f0(
input_dir: Path | str,
config_path: Path | str,
n_jobs: int = 4,
f0_method: Literal[
"crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
] = "crepe",
f0_method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "dio",
force_rebuild: bool = False,
):
input_dir = Path(input_dir)
Expand Down
6 changes: 4 additions & 2 deletions src/so_vits_svc_fork/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def compute_f0(
p_len: None | int = None,
sampling_rate: int = 44100,
hop_length: int = 512,
method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "crepe",
method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "dio",
**kwargs,
):
with timer() as t:
Expand All @@ -260,7 +260,9 @@ def compute_f0(
elif method == "parselmouth":
f0 = compute_f0_parselmouth(wav_numpy, p_len, sampling_rate, hop_length)
else:
raise ValueError("type must be dio, crepe, harvest or parselmouth")
raise ValueError(
"type must be dio, crepe, crepe-tiny, harvest or parselmouth"
)
rtf = t.elapsed / (len(wav_numpy) / sampling_rate)
LOG.info(f"F0 inference time: {t.elapsed:.3f}s, RTF: {rtf:.3f}")
return f0
Expand Down

0 comments on commit baf58d2

Please sign in to comment.