Skip to content

Commit

Permalink
Merge pull request #131 from dabraude/master
Browse files Browse the repository at this point in the history
adds script for custom voices
  • Loading branch information
dabraude committed Dec 18, 2020
2 parents 6f3015c + c9b2ec1 commit cd0581f
Show file tree
Hide file tree
Showing 68 changed files with 5,842 additions and 1 deletion.
5 changes: 4 additions & 1 deletion idlak-data/en/ga/lexicon-cmu0.7a.xml
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@
<lex pron="ae0 k s eh1 n ch ah0 w ey2 t ih0 d" entry="full" default="true">accentuated</lex>
<lex pron="ae0 k s eh1 n ch uw0 ey0 t s" entry="full" default="true">accentuates</lex>
<lex pron="ae0 k s eh1 n ch ah0 w ey2 t ih0 ng" entry="full" default="true">accentuating</lex>
<lex pron="ae1 k s eh0 n ch er0" entry="full" default="true">accenture</lex>
<lex pron="ae0 k s eh1 p t" entry="full" default="true">accept</lex>
<lex pron="ah0 k s eh1 p t" entry="1" default="false">accept</lex>
<lex pron="ah0 k s eh2 p t ah0 b ih1 l ah0 t iy0" entry="full" default="true">acceptability</lex>
Expand Down Expand Up @@ -11428,7 +11429,8 @@
<lex pron="b ay0 m ah1 n th l iy0" entry="full" default="true">bimonthly</lex>
<lex pron="b ih1 n" entry="full" default="true">bin</lex>
<lex pron="b iy1 n ah0" entry="full" default="true">bina</lex>
<lex pron="b ay1 n er0 iy0" entry="full" default="true">binary</lex>
<lex pron="b ay1 n ax0 r iy0" entry="full" default="true">binary</lex>
<lex pron="b ay1 n er0 iy0" entry="full" default="false">binary</lex>
<lex pron="b ay0 n ae1 sh ah0 n ah0 l" entry="full" default="true">binational</lex>
<lex pron="b ih1 n ch iy0" entry="full" default="true">binchy</lex>
<lex pron="b ay1 n d" entry="full" default="true">bind</lex>
Expand Down Expand Up @@ -19537,6 +19539,7 @@
<lex pron="s eh1 r ah0 m ow2 n iy0 z" entry="full" default="true">ceremonies</lex>
<lex pron="s eh1 r ah0 m ow2 n iy0" entry="full" default="true">ceremony</lex>
<lex pron="s er0 ey1 n ow0" entry="full" default="true">cereno</lex>
<lex pron="s eh1 r ax0 p r ao2 k" entry="full" default="true">cereproc</lex>
<lex pron="s ih1 r iy0 z" entry="full" default="true">ceres</lex>
<lex pron="s eh2 r ey1 z ow0" entry="full" default="true">cerezo</lex>
<lex pron="s er0 ey1 z ow0" entry="1" default="false">cerezo</lex>
Expand Down
23 changes: 23 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export train_cmd=run.pl
export decode_cmd="run.pl --mem 2G"
# the use of cuda_cmd is deprecated, used only in 'nnet1',
export cuda_cmd="run.pl --gpu 1"

if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
fi
5 changes: 5 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/2-layer-nn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
hid_layers=2
hid-dim=1000
splice_lr=0
apply_cmvn=true
apply_glob_cmvn=true
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/3-layer-nn-2splice10.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
hid_layers=3
insplice_lr=10
splice_lr=10
apply_glob_cmvn=true
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/3-layer-nn-splice10.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
hid_layers=3
insplice_lr=10
splice_lr=0
apply_glob_cmvn=true
5 changes: 5 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/3-layer-nn-splice5.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
hid_layers=3
insplice_lr=5
insplice_step=1
splice_lr=0
apply_glob_cmvn=true
3 changes: 3 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/3-layer-nn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
hid_layers=3
splice_lr=0
apply_glob_cmvn=true
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/4-layer-nn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
hid_layers=4
splice_lr=0
apply_cmvn=true
apply_glob_cmvn=true
5 changes: 5 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/auto-nn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
hid_layers=2
splice_lr=0
apply_glob_cmvn=true
apply_cmvn=false
apply_minmax=false
8 changes: 8 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/bndap-48k.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--snip-edges=false
--frame-shift=5
--sample-frequency=48000
--num-mel-bins=25
--scale-type=bark
--use-hts-bands=true
--peak-quality=0.4
--peak-width=0.1
9 changes: 9 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/bndap.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
--snip-edges=false
--frame-shift=5
--sample-frequency=16000
--num-mel-bins=21
--use-hts-bands=true
--peak-quality=0.4
--peak-width=0.1


3 changes: 3 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/decode_dnn.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#beam=18.0 # beam for decoding. Was 13.0 in the scripts.
#latbeam=10.0 # this has most effect on size of the lattices.
parallel_opts=
13 changes: 13 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/dur-lstm-splice5.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
hid_layers=3
insplice=
splice=0
delta_opts="--delta-order=2"
minmax_opts=true
learn_rate=0.001
hid_dim=100
network_type=lstm
momentum=0.8
proto_opts="--no-softmax --cell-dim=100 --proj-dim=50 --num-layers=2 --activation-final --activation-type=<Sigmoid>"
train_tool=nnet-train-multistream-perutt-tgtmat
train_tool_opts="--objective-function=mse --num-streams=10"
scheduler_opts="--min-iters 15"
22 changes: 22 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/dur-nn-splice5.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
hid_layers=3
insplice=5
splice=0
delta_opts=
#"--delta-order=2"
#minmax_opts=

#proto_opts="--no-softmax --activation-type=<Tanh>"
#learn_rate=0.0001

# --activation-final=<Sigmoid>
#proto_opts="--no-softmax --activation-type=<Sigmoid>"
#learn_rate=0.00001
cmvn_opts="--norm-means=false --norm-vars=true"
minmax_opts=
global_cmvn=
proto_opts="--no-softmax --activation-type=<Sigmoid>"
learn_rate=0.000005
momentum=0.2
hid_dim=100
halving_factor=0.75
max_iters=50
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/fbank.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# No non-default options for now.
--frame-shift=5
--num-mel-bins=50
--use-energy=true
13 changes: 13 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/full-lstm-splice5.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
hid_layers=3
insplice=
splice=0
delta_opts="--delta-order=2"
minmax_opts=true
learn_rate=0.0001
hid_dim=100
network_type=lstm
momentum=0.8
proto_opts="--no-softmax --cell-dim=1000 --proj-dim=700 --num-layers=2 --activation-final --activation-type=<Sigmoid>"
train_tool=nnet-train-multistream-perutt-tgtmat
train_tool_opts="--objective-function=mse --num-streams=10"
scheduler_opts="--min-iters 15"
11 changes: 11 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/full-nn-splice5.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
hid_layers=3
insplice=5
splice=0
delta_opts="--delta-order=2"
proto_opts="--activation-final=<Sigmoid> --no-softmax --activation-type=<Sigmoid>"
learn_rate=0.02
minmax_opts=true
momentum=0.2
hid_dim=700
halving_factor=0.75
max_iters=50
3 changes: 3 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/mcep-48k.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
srate=48000
order=60
alpha=0.55
3 changes: 3 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/mcep.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
srate=16000
order=39
alpha=0.42
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/mfcc-48k.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
--use-energy=false
--frame-shift=5
--snip-edges=false
--sample-frequency=48000
3 changes: 3 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/mfcc.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--use-energy=false
--frame-shift=5
--snip-edges=false
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/pitch-48k.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
--snip-edges=false
--frame-shift=5
--sample-frequency=48000
--frame-length=30
13 changes: 13 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/pitch-lstm-splice5.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
hid_layers=3
insplice=
splice=0
delta_opts="--delta-order=2"
minmax_opts=true
learn_rate=0.0005
hid_dim=100
network_type=lstm
momentum=0.8
proto_opts="--no-softmax --cell-dim=100 --proj-dim=50 --num-layers=2 --activation-final --activation-type=<Sigmoid>"
train_tool=nnet-train-multistream-perutt-tgtmat
train_tool_opts="--objective-function=mse --num-streams=10"
scheduler_opts="--min-iters 15"
22 changes: 22 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/pitch-nn-splice5.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
hid_layers=3
insplice=0
splice=0
delta_opts="--delta-order=2"
minmax_opts=true
#cmvn_opts="--norm-means=true --norm-vars=true"
proto_opts="--activation-final=<Sigmoid> --no-softmax --activation-type=<Sigmoid>"
learn_rate=0.02
momentum=0.2
hid_dim=128
halving_factor=0.5
max_iters=50

#cmvn_opts="--norm-means=false --norm-vars=true"
#minmax_opts=
#global_cmvn=true
#proto_opts="--no-softmax --activation-type=<Sigmoid>"
#learn_rate=0.02
#momentum=0.2
#hid_dim=100
#halving_factor=0.75
#max_iters=50
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/pitch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
--snip-edges=false
--frame-shift=5
--sample-frequency=16000
--frame-length=30
4 changes: 4 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/conf/pretrain.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
splice=4
delta_order=2
apply_cmvn=true
copy_feats=true
81 changes: 81 additions & 0 deletions idlak-egs/tts_tangle_custom/s2/local/apply_map.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/perl -w
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.

# This program is a bit like ./sym2int.pl in that it applies a map
# to things in a file, but it's a bit more general in that it doesn't
# assume the things being mapped to are single tokens, they could
# be sequences of tokens.

# This program takes two arguments, which may be files or "-" for the
# standard input. Both files must have lines with one or more fields,
# interpreted as a map from the first field (a string) to a list of strings.
# if the first file has as one of its lines
# A x y
# and the second has the lines
# x P
# y Q R
# then the output of this program will be
# A P Q R
#
# Note that if x or y did not appear as the first field of file b, we would
# print a warning and omit the whole line rather than map it to the empty
# string.


if (@ARGV > 0 && $ARGV[0] eq "-f") {
shift @ARGV;
$field_spec = shift @ARGV;
if ($field_spec =~ m/^\d+$/) {
$field_begin = $field_spec - 1; $field_end = $field_spec - 1;
}
if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
if ($1 ne "") {
$field_begin = $1 - 1; # Change to zero-based indexing.
}
if ($2 ne "") {
$field_end = $2 - 1; # Change to zero-based indexing.
}
}
if (!defined $field_begin && !defined $field_end) {
die "Bad argument to -f option: $field_spec";
}
}


if(@ARGV != 1) {
print STDERR "Usage: apply_map.pl [options] map <input >output\n" .
"options: [-f <field-range> ]\n" .
"note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n" .
"e.g.: echo A B | apply_map.pl a.txt\n" .
"where a.txt is:\n" .
"A a1 a2\n" .
"B b\n" .
"will produce:\n" .
"a1 a2 b\n";
exit(1);
}

($map) = @ARGV;
open(M, "<$map") || die "Error opening map file $map: $!";

while (<M>) {
@A = split(" ", $_);
@A >= 1 || die "apply_map.pl: empty line.";
$i = shift @A;
$o = join(" ", @A);
$map{$i} = $o;
}

while(<STDIN>) {
@A = split(" ", $_);
for ($x = 0; $x < @A; $x++) {
if ( (!defined $field_begin || $x >= $field_begin)
&& (!defined $field_end || $x <= $field_end)) {
$a = $A[$x];
if (!defined $map{$a}) { die "compose_maps.pl: undefined key $a\n"; }
$A[$x] = $map{$a};
}
}
print join(" ", @A) . "\n";
}

0 comments on commit cd0581f

Please sign in to comment.