Skip to content

Commit

Permalink
fix(preprocess): fix dtype in sf.read() to save memory and fix prepro…
Browse files Browse the repository at this point in the history
…cess_resample (#132)
  • Loading branch information
34j committed Mar 26, 2023
1 parent 4203f37 commit 0af1e13
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 31 deletions.
16 changes: 14 additions & 2 deletions src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,14 +434,26 @@ def vc(
help="path to output dir",
)
@click.option("-s", "--sampling-rate", type=int, default=44100, help="sampling rate")
def pre_resample(input_dir: Path, output_dir: Path, sampling_rate: int) -> None:
@click.option(
"-n",
"--n-jobs",
type=int,
default=-1,
help="number of jobs (optimal value may depend on your RAM capacity and audio duration per file)",
)
def pre_resample(
input_dir: Path, output_dir: Path, sampling_rate: int, n_jobs: int
) -> None:
"""Preprocessing part 1: resample"""
from .preprocess_resample import preprocess_resample

input_dir = Path(input_dir)
output_dir = Path(output_dir)
preprocess_resample(
input_dir=input_dir, output_dir=output_dir, sampling_rate=sampling_rate
input_dir=input_dir,
output_dir=output_dir,
sampling_rate=sampling_rate,
n_jobs=n_jobs,
)


Expand Down
57 changes: 30 additions & 27 deletions src/so_vits_svc_fork/preprocess_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pathlib import Path
from typing import Iterable

import audioread.exceptions
import librosa
import numpy as np
import soundfile
Expand Down Expand Up @@ -48,37 +47,38 @@ def is_relative_to(path: Path, *other):
return False


def preprocess_resample(
input_dir: Path | str, output_dir: Path | str, sampling_rate: int
) -> None:
input_dir = Path(input_dir)
output_dir = Path(output_dir)
"""Preprocess audio files in input_dir and save them to output_dir."""
def _preprocess_one(input_path: Path, output_path: Path, sampling_rate: int) -> None:
"""Preprocess one audio file."""

try:
audio, sr = sf.read(input_path, dtype="float32")

def preprocess_one(input_path: Path, output_path: Path) -> None:
"""Preprocess one audio file."""
# Audioread is the last backend it will attempt, so this is the exception thrown on failure
except Exception as e:
# Failure due to attempting to load a file that is not audio, so return early
LOG.warning(f"Failed to load {input_path} due to {e}")
return

try:
audio, sr = sf.read(input_path)
# Trim silence
audio, _ = librosa.effects.trim(audio, top_db=20)

# Audioread is the last backend it will attempt, so this is the exception thrown on failure
except audioread.exceptions.NoBackendError as e:
# Failure due to attempting to load a file that is not audio, so return early
LOG.warning(f"Failed to load {input_path} due to {e}")
return
# Adjust volume
peak = np.abs(audio).max()
if peak > 1.0:
audio = 0.98 * audio / peak

# Trim silence
audio, _ = librosa.effects.trim(audio, top_db=20)
# Resample
audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
audio /= max(audio.max(), -audio.min())
soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")

# Adjust volume
peak = np.abs(audio).max()
if peak > 1.0:
audio = 0.98 * audio / peak

# Resample
audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
audio /= max(audio.max(), -audio.min())
soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")
def preprocess_resample(
input_dir: Path | str, output_dir: Path | str, sampling_rate: int, n_jobs: int = -1
) -> None:
input_dir = Path(input_dir)
output_dir = Path(output_dir)
"""Preprocess audio files in input_dir and save them to output_dir."""

in_paths = []
out_paths = []
Expand Down Expand Up @@ -108,4 +108,7 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
in_and_out_paths = list(zip(in_paths, out_paths))

with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths)
Parallel(n_jobs=n_jobs)(
delayed(_preprocess_one)(*args, sampling_rate=sampling_rate)
for args in in_and_out_paths
)
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/preprocess_speaker_diarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _process_one(
huggingface_token: str | None = None,
) -> None:
try:
audio, sr = sf.read(input_path)
audio, sr = sf.read(input_path, dtype="float32")
except Exception as e:
LOG.warning(f"Failed to read {input_path}: {e}")
return
Expand Down
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/preprocess_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _process_one(
hop_seconds: float = 0.1,
):
try:
audio, sr = sf.read(input_path)
audio, sr = sf.read(input_path, dtype="float32")
except Exception as e:
LOG.warning(f"Failed to read {input_path}: {e}")
return
Expand Down

0 comments on commit 0af1e13

Please sign in to comment.