Skip to content

Commit

Permalink
fix: fix default dataset path (#43)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: the behaviour of preprocess_resample changes when there is a folder ./dataset_raw/44k and "44k" is no longer allowed as a speaker name in some conditions
  • Loading branch information
34j committed Mar 20, 2023
1 parent 4068479 commit ac47fed
Show file tree
Hide file tree
Showing 27 changed files with 32 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ cython_debug/

# additional files
tests/**/*.wav
!tests/dataset_raw/44k/34j/**/*.wav
!tests/dataset_raw/34j/**/*.wav
tests/**/*.npy
tests/**/*.pt
tests/**/*.txt
Expand Down
8 changes: 4 additions & 4 deletions notebooks/so-vits-svc-fork-4.0.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"metadata": {},
"outputs": [],
"source": [
"!mkdir -p \"dataset_raw/44k\""
"!mkdir -p \"dataset_raw\""
]
},
{
Expand All @@ -89,7 +89,7 @@
"metadata": {},
"outputs": [],
"source": [
"#!rm -r \"dataset_raw/44k\"\n",
"#!rm -r \"dataset_raw\"\n",
"#!rm -r \"dataset/44k\""
]
},
Expand All @@ -101,7 +101,7 @@
"source": [
"#@title Copy your dataset\n",
"DATASET_NAME = \"kiritan\" #@param {type: \"string\"}\n",
"!cp -R /content/drive/MyDrive/so-vits-svc-fork/dataset/{DATASET_NAME}/ -t \"dataset_raw/44k/\""
"!cp -R /content/drive/MyDrive/so-vits-svc-fork/dataset/{DATASET_NAME}/ -t \"dataset_raw/\""
]
},
{
Expand All @@ -114,7 +114,7 @@
"#@markdown Make sure you agree to the license when using this dataset.\n",
"# !wget https://tyc.rei-yumesaki.net/files/sozai-tyc-corpus1.zip\n",
"# !unzip sozai-tyc-corpus1.zip\n",
"# !mv \"/content/つくよみちゃんコーパス Vol.1 声優統計コーパス(JVSコーパス準拠)/おまけ:WAV(+12dB増幅&高音域削減)/WAV(+12dB増幅&高音域削減)\" \"dataset_raw/44k/tsukuyomi\""
"# !mv \"/content/つくよみちゃんコーパス Vol.1 声優統計コーパス(JVSコーパス準拠)/おまけ:WAV(+12dB増幅&高音域削減)/WAV(+12dB増幅&高音域削減)\" \"dataset_raw/tsukuyomi\""
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def vc(
"-i",
"--input-dir",
type=click.Path(exists=True),
default=Path("./dataset_raw/44k"),
default=Path("./dataset_raw"),
help="path to source dir",
)
@click.option(
Expand Down
23 changes: 23 additions & 0 deletions src/so_vits_svc_fork/preprocess_resample.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from logging import getLogger
from pathlib import Path
from typing import Iterable
Expand Down Expand Up @@ -35,6 +36,17 @@ def _get_unique_filename(path: Path, existing_paths: Iterable[Path]) -> Path:
i += 1


def is_relative_to(path: Path, *other):
"""Return True if the path is relative to another path or False.
Python 3.9+ has Path.is_relative_to() method, but we need to support Python 3.8.
"""
try:
path.relative_to(*other)
return True
except ValueError:
return False


def preprocess_resample(
input_dir: Path | str, output_dir: Path | str, sampling_rate: int
) -> None:
Expand Down Expand Up @@ -71,6 +83,17 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
out_paths = []
for in_path in input_dir.rglob("*.*"):
in_path_relative = in_path.relative_to(input_dir)
if not in_path.is_absolute() and is_relative_to(
in_path, Path("dataset_raw") / "44k"
):
new_in_path_relative = in_path_relative.relative_to("44k")
warnings.warn(
f"Recommended folder structure has changed since v1.0.0. "
"Please move your dataset directly under dataset_raw folder. "
f"Recoginzed {in_path_relative} as {new_in_path_relative}"
)
in_path_relative = new_in_path_relative

if len(in_path_relative.parts) < 2:
continue
speaker_name = in_path_relative.parts[0]
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file removed tests/dataset_raw/44k/34j/11.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/12.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/13.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/14.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/15.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/16.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/17.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/18.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/19.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/20.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/nested/1.wav
Binary file not shown.
Binary file removed tests/dataset_raw/44k/34j/nested2/1.wav
Binary file not shown.
8 changes: 3 additions & 5 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@ def test_infer(self):
raise SkipTest("Skip inference test on CI")
from so_vits_svc_fork.inference_main import infer # noqa

# infer("tests/dataset_raw/44k/34j/1.wav", "tests/configs/config.json", "tests/logs/44k")
# infer("tests/dataset_raw/34j/1.wav", "tests/configs/config.json", "tests/logs/44k")

def test_preprocess(self):
from so_vits_svc_fork.preprocess_resample import preprocess_resample

preprocess_resample("tests/dataset_raw/44k", "tests/dataset/44k", 44100)
preprocess_resample("tests/dataset_raw", "tests/dataset/44k", 44100)

def test_preprocess_config(self):
from so_vits_svc_fork.preprocess_flist_config import preprocess_config

preprocess_config(
Expand All @@ -40,9 +39,8 @@ def test_preprocess_config(self):
"tests/configs/config.json",
)

def test_preprocess_hubert(self):
if IS_CI:
raise SkipTest("Skip preprocessing test on CI")
raise SkipTest("Skip hubert and f0 test on CI")
from so_vits_svc_fork.preprocess_hubert_f0 import preprocess_hubert_f0

preprocess_hubert_f0("tests/dataset/44k", "tests/configs/44k/config.json")
Expand Down

0 comments on commit ac47fed

Please sign in to comment.