Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Voice-building-utils: Avoid spam during preprocessing data #867

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/scripts/general/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
matplotlib
argparse
cython
pysoundfile
numpy
scipy
webrtcvad
pyworld
tqdm
5 changes: 2 additions & 3 deletions src/scripts/general/setup_environment_debian
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,14 @@ hts_installation () {
# Cloning and building the RHVoice

build_rh_voice () {
echo "Installing some additional packages"
pip install matplotlib argparse cython pysoundfile
pip install numpy scipy webrtcvad pyworld
sudo apt install libparallel-forkmanager-perl jq -y
echo "Cloning RHVoice"
git clone --recurse https://github.com/RHVoice/RHVoice.git
echo "Building RHVoice for development"
cd RHVoice
scons dev=True
echo "Installing some additional packages"
pip install -R src/scripts/general/requirements.txt
cd ..
# Setting permissions to praat
echo "Setting permissions to praat"
Expand Down
62 changes: 48 additions & 14 deletions src/scripts/general/voice-building-utils
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: utf-8; mode: Python; indent-tabs-mode: t -*-

# Copyright (C) 2012, 2013, 2017, 2021 Olga Yakovleva <olga@rhvoice.org>
# Copyright (C) 2024 Mateo Cedillo <angelitomateocedillo@gmail.com>

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -35,7 +36,7 @@ import numpy
from scipy.signal import firwin,lfilter,kaiserord
from scipy.io import wavfile
import pathlib

from tqdm import tqdm
import pyworld

version_pattern=re.compile(r"(?m)^\s*\$ver\s*=\s*'(\d+)'\s*;\s*$")
Expand Down Expand Up @@ -226,7 +227,6 @@ class recordings_importer(task):
return files

def process(self,inpath,inname,outname):
print("Processing {}".format(inname))
cmd=[self.settings.get("praat_path","praat"),"--run",
os.path.join(scriptdir,"import.praat"),
inpath,
Expand All @@ -247,11 +247,15 @@ class recordings_importer(task):
files=self.get_file_list()
mapping=collections.OrderedDict()
times=collections.OrderedDict()
pbar = tqdm(total=len(files), desc="Processing")
for inpath,inname,outname in files:
inbase=os.path.splitext(inname)[0]
pbar.set_description("Processing {}".format(inname))
t1,t2=self.process(inpath,inname,outname)
mapping[outname]=inbase
times[inbase]=[t1,t2]
pbar.update(1)
pbar.close()
with open("file_name_mapping.json","w") as f:
json.dump(mapping,f,indent=0)
with open("trim.json","w") as f:
Expand Down Expand Up @@ -668,7 +672,6 @@ class htk_segmenter(task):
pvf=vf

def vad_file(self, name):
print("Vad for", name)
sr, samples=wavfile.read(os.path.join(self.wavdir, name+".wav"))
assert(sr==16000)
assert(samples.dtype==numpy.int16)
Expand All @@ -681,13 +684,15 @@ class htk_segmenter(task):
def do_vad(self):
if not self.init_sil_flag:
return
print("Aplying Voice Activity Detection")
with open(self.vad_mlf_path, "wt") as f:
f.write("#!MLF!#\n")
for file_name in self.recordings:
f.write('"*/{}.lab"\n'.format(file_name))
for name, start, end in self.vad_file(file_name):
f.write("{} {} {}\n".format(start, end, name))
f.write(".\n")
print("Done")

def __call__(self,args):
self.setup()
Expand Down Expand Up @@ -891,7 +896,6 @@ class f0_extracter(task):
def process(self,name):
raw_dir=os.path.join("data","raw")
base,ext=os.path.splitext(name)
print("Processing {}".format(base))
raw_path=os.path.join(raw_dir,name)
min_f0=self.settings["lower_f0"]
max_f0=self.settings["upper_f0"]
Expand All @@ -906,13 +910,17 @@ class f0_extracter(task):
self.shift=self.get_analysis_params()["FRAMESHIFT"]
raw_dir=os.path.join("data","raw")
f0_dir=os.path.join("data","lf0")
filelist = os.listdir(raw_dir)
if not os.path.isdir(f0_dir):
os.mkdir(f0_dir)
print("Extracting f0")
for name in sorted(os.listdir(raw_dir)):
pbar = tqdm(total=len(filelist), desc="Processing")
for name in sorted(filelist):
base,ext=os.path.splitext(name)
if os.path.exists(os.path.join(f0_dir,base+".lf0")):
pbar.update(1)
continue
pbar.set_description("Processing {}".format(name))
values=self.process(name)
nzvalues=[v for v in values if v!=0]
assert(len(nzvalues)>0)
Expand All @@ -926,6 +934,8 @@ class f0_extracter(task):
else:
lf0=math.log(f0)
f.write(struct.pack("=f",lf0))
pbar.update(1)
pbar.close()

class f0_range_computer(f0_extracter):
def register(self):
Expand All @@ -934,18 +944,22 @@ class f0_range_computer(f0_extracter):

def __call__(self,args):
raw_dir=os.path.join("data","raw")
filelist = os.listdir(raw_dir)
data=list()
if "guess_f0_method" in self.settings:
method=self.settings["guess_f0_method"]
else:
method=None
if (not isinstance(method,str)) or len(method)==0:
method="sptk_rapt"
for name in sorted(os.listdir(raw_dir)):
print("Processing {}".format(name))
pbar = tqdm(total=len(filelist), desc="Processing")
for name in sorted(filelist):
pbar.set_description("Processing {}".format(name))
values=self.extract_with(os.path.join(raw_dir,name),method,40,700)
values=[v for v in values if v!=0]
data.extend(numpy.log(values))
pbar.update(1)
pbar.close()
data=numpy.array(data)
m=numpy.mean(data)
d=3.0*numpy.std(data)
Expand Down Expand Up @@ -987,7 +1001,6 @@ class synthesizer(task):
return outpath

def process(self,name):
print("Processing {}".format(name))
params=self.get_analysis_params()
bindir=self.settings["bindir"]
excite=os.path.join(bindir,"excite")
Expand All @@ -1008,9 +1021,14 @@ class synthesizer(task):
if not os.path.isdir(synth_dir):
os.mkdir(synth_dir)
raw_dir=os.path.join("data","raw")
for name in sorted(os.listdir(raw_dir)):
filelist = os.listdir(raw_dir)
pbar = tqdm(total=len(filelist), desc="Processing")
for name in sorted(filelist):
base,ext=os.path.splitext(name)
pbar.set_description("Processing {}".format(base))
self.process(base)
pbar.update(1)
pbar.close()

class phonetic_feature_table(object):
def __init__(self):
Expand Down Expand Up @@ -1662,6 +1680,7 @@ class nccf(object):
class bap_extractor(task):
def register(self):
subparser=subparsers.add_parser("extract-bap")
subparser.add_argument("--skip",type=bool,default=False,help="Skip already extracted data.")
subparser.set_defaults(func=self)

def make_filters(self):
Expand Down Expand Up @@ -1706,7 +1725,6 @@ class bap_extractor(task):
return cc

def process(self,name):
print("Processing {}".format(name))
speech=self.load_speech(name)
f0=self.load_f0(name)
bands=self.split_into_bands(speech)
Expand All @@ -1724,18 +1742,26 @@ class bap_extractor(task):
self.bap_dir=os.path.join("data","bap")
if not os.path.isdir(self.bap_dir):
os.mkdir(self.bap_dir)
for name in sorted(os.listdir(os.path.join("data","raw"))):
filelist = os.listdir(os.path.join("data","raw"))
print("Extracting bap")
pbar = tqdm(total=len(filelist), desc="Processing")
for name in sorted(filelist):
base,ext=os.path.splitext(name)
if args.skip and os.path.exists(os.path.join(self.bap_dir,f"{base}.bap")):
pbar.update(1)
continue
pbar.set_description("Processing {}".format(base))
self.process(base)
pbar.update(1)
pbar.close()

class mgc_extractor(task):
def register(self):
subparser=subparsers.add_parser("extract-mgc")
subparser.add_argument("--skip",type=bool,default=False,help="Skip already extracted data.")
subparser.set_defaults(func=self)

def process(self,name):

print("Processing {}".format(name))
sample_rate=self.settings["sample_rate"]
speech=self.load_speech(name)
target_db=self.settings.get("volume",-20)
Expand All @@ -1759,12 +1785,20 @@ class mgc_extractor(task):
self.aparams=self.get_analysis_params()
self.mgc_dir=os.path.join("data","mgc")
self.raw_dir=os.path.join("data", "raw")
self.filelist = os.listdir(self.raw_dir)
if not os.path.isdir(self.mgc_dir):
os.mkdir(self.mgc_dir)
print("Extracting MGC")
for name in sorted(os.listdir(self.raw_dir)):
pbar = tqdm(total=len(self.filelist), desc="Processing")
for name in sorted(self.filelist):
base,ext=os.path.splitext(name)
if args.skip and os.path.exists(os.path.join(self.mgc_dir,f"{base}.mgc")):
pbar.update(1)
continue
pbar.set_description("Processing {}".format(base))
self.process(base)
pbar.update(1)
pbar.close()

if __name__=="__main__":
for cls in [initializer,configurator,recordings_importer,labeller,f0_extracter,questions_maker,f0_range_computer,synthesizer,realigner,htk_segmenter,lpf_maker,voice_exporter,bap_extractor, mgc_extractor]:
Expand Down
Loading