In [1]:
import os
import json
import glob

exp_dir = "logs/high_text_enc_exp1"

In [2]:
from preprocess import preprocess_training_set

In [7]:
from extract import run_pitch_extraction, run_embedding_extraction, generate_config, generate_filelist

f0_method = "rmvpe"
hop_length = 128
num_processes = 2
gpus = "0"
sample_rate = "40000"
embedder_model = "contentvec"
embedder_model_custom = None
include_mutes = 0

wav_path = os.path.join(exp_dir, "sliced_audios_16k")
os.makedirs(os.path.join(exp_dir, "f0"), exist_ok=True)
os.makedirs(os.path.join(exp_dir, "f0_voiced"), exist_ok=True)
os.makedirs(os.path.join(exp_dir, "extracted"), exist_ok=True)

chosen_embedder_model = (
    embedder_model_custom if embedder_model == "custom" else embedder_model
)
file_path = os.path.join(exp_dir, "model_info.json")
if os.path.exists(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)
else:
    data = {}
data["embedder_model"] = chosen_embedder_model
with open(file_path, "w") as f:
    json.dump(data, f, indent=4)

devices = ["cpu"] if gpus == "-" else [f"cuda:{idx}" for idx in gpus.split("-")]

In [None]:
preprocess_training_set(
    input_root=r"",
    sr=40000,
    num_processes=32,
    exp_dir=exp_dir,
    cut_preprocess="Automatic",
    process_effects=True,
    noise_reduction=False,
    reduction_strength=0.7,
    chunk_len=3.0,
    overlap_len=0.3,
)

In [8]:
files = []
for file in glob.glob(os.path.join(wav_path, "*.wav")):
    file_name = os.path.basename(file)
    file_info = [
        file,
        os.path.join(exp_dir, "f0", file_name + ".npy"),
        os.path.join(exp_dir, "f0_voiced", file_name + ".npy"),
        os.path.join(exp_dir, "extracted", file_name.replace("wav", "npy")),
    ]
    files.append(file_info)

In [None]:
run_pitch_extraction(files, devices, f0_method, hop_length, num_processes)

In [None]:
run_embedding_extraction(
    files, devices, embedder_model, embedder_model_custom, num_processes
)

In [12]:
generate_config(sample_rate, exp_dir)

In [13]:
generate_filelist(exp_dir, sample_rate, include_mutes)