Skip to content

Commit

Permalink
Merge pull request #2 from kaldi-asr/master
Browse files Browse the repository at this point in the history
pull from master
  • Loading branch information
chenzhehuai committed Feb 6, 2018
2 parents 240f0e4 + 7906590 commit 60f2bcf
Show file tree
Hide file tree
Showing 23 changed files with 93 additions and 70 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Compiled extensionless executable files in /src/*/
# This stanza must precede wildcard patterns below!
/src/*/*
!/src/lm/test_data/
!/src/*/?*.*
!/src/doc/*
!/src/*/Makefile
Expand Down
Empty file modified egs/fisher_swbd/s5/local/chain/compare_wer_general.sh
100644 → 100755
Empty file.
Empty file modified egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh
100644 → 100755
Empty file.
Empty file modified egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh
100644 → 100755
Empty file.
Empty file modified egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh
100644 → 100755
Empty file.
Empty file modified egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh
100644 → 100755
Empty file.
Empty file modified egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh
100644 → 100755
Empty file.
Empty file modified egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh
100644 → 100755
Empty file.
Empty file modified egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh
100644 → 100755
Empty file.
2 changes: 0 additions & 2 deletions egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,6 @@ if [ $stage -le 15 ]; then
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
fi

touch $dir/egs/.nodelete # keep egs around when that run dies.

steps/nnet3/chain/train.py --stage $train_stage \
--cmd "$decode_cmd" \
--feat.online-ivector-dir $train_ivector_dir \
Expand Down
45 changes: 17 additions & 28 deletions egs/swbd/s5c/local/nnet3/run_ivector_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,38 @@
set -e
stage=1
train_stage=-10
generate_alignments=true # false if doing ctc training
generate_alignments=true
speed_perturb=true

. ./path.sh
. ./utils/parse_options.sh

mkdir -p nnet3
# perturbed data preparation
mkdir -p exp/nnet3
train_set=train_nodup

if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi

if [ "$speed_perturb" == "true" ]; then
if $speed_perturb; then
if [ $stage -le 1 ]; then
#Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
# Although the nnet will be trained by high resolution data, we still have to perturb the normal data to get the alignments
# _sp stands for speed-perturbed

for datadir in train_nodup; do
utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
rm -r data/temp1 data/temp2

mfccdir=mfcc_perturbed
steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
utils/fix_data_dir.sh data/${datadir}_tmp

utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
utils/fix_data_dir.sh data/${datadir}_sp
rm -r data/temp0 data/${datadir}_tmp
done
echo "$0: preparing directory for speed-perturbed data"
utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp

echo "$0: creating MFCC features for low-resolution speed-perturbed data"
mfccdir=mfcc_perturbed
steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
data/${train_set}_sp exp/make_mfcc/${train_set}_sp $mfccdir
steps/compute_cmvn_stats.sh data/${train_set}_sp exp/make_mfcc/${train_set}_sp $mfccdir
utils/fix_data_dir.sh data/${train_set}_sp
fi

if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then
#obtain the alignment of the perturbed data
if [ $stage -le 2 ] && $generate_alignments; then
# obtain the alignment of the perturbed data
steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
data/train_nodup_sp data/lang exp/tri4 exp/tri4_ali_nodup_sp || exit 1
data/${train_set}_sp data/lang exp/tri4 exp/tri4_ali_nodup_sp
fi
train_set=train_nodup_sp
train_set=${train_set}_sp
fi

if [ $stage -le 3 ]; then
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/steps/cleanup/lattice_oracle_align.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ fi
nj=$(cat $latdir/num_jobs)
oov=$(cat $lang/oov.int)

utils/split_data.sh --per-utt $data $nj
utils/split_data.sh $data $nj

sdata=$data/split${nj}utt
sdata=$data/split$nj;

if [ $stage -le 1 ]; then
$cmd JOB=1:$nj $dir/log/get_oracle.JOB.log \
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/utils/data/get_uniform_subsegments.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ def run(args):
else:
end = end_time
new_utt = "{utt_id}-{s:08d}-{e:08d}".format(
utt_id=utt_id, s=int(100 * (start - start_time)),
e=int(100 * (end - start_time)))
utt_id=utt_id, s=int(round(100 * (start - start_time))),
e=int(round(100 * (end - start_time))))
print ("{new_utt} {utt_id} {s} {e}".format(
new_utt=new_utt, utt_id=utt_id, s=start - start_time,
e=end - start_time))
Expand Down
12 changes: 10 additions & 2 deletions egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,17 @@ utils/data/get_utt2dur.sh ${srcdir}

utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1
utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1

rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0
if [ ! -f $srcdir/utt2uniq ]; then
cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s\n", $1, $1);}' > ${destdir}_speed1.0/utt2uniq
else
cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s\n", $1, $2);}' > ${destdir}_speed1.0/utt2uniq
fi

utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1

rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0

echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
utils/validate_data_dir.sh --no-feats --no-text $destdir
15 changes: 13 additions & 2 deletions src/base/io-funcs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,14 @@ int PeekToken(std::istream &is, bool binary) {
}
int ans = is.peek();
if (read_bracket) {
if (!is.unget())
if (!is.unget()) {
KALDI_WARN << "Error ungetting '<' in PeekToken";
// Clear the bad bit. It seems to be possible for this code to be
// reached, and the C++ standard is very vague on whether even a single
// call to unget() should succeed; see
// http://www.cplusplus.com/reference/istream/istream/unget/
is.clear();
}
}
return ans;
}
Expand All @@ -197,7 +203,12 @@ void ExpectToken(std::istream &is, bool binary, const char *token) {
KALDI_ERR << "Failed to read token [started at file position "
<< pos_at_start << "], expected " << token;
}
if (strcmp(str.c_str(), token) != 0) {
// The second half of the '&&' expression below is so that if we're expecting
// "<Foo>", we will accept "Foo>" instead. This is so that the model-reading
// code will tolerate errors in PeekToken where is.unget() failed; search for
// is.clear() in PeekToken() for an explanation.
if (strcmp(str.c_str(), token) != 0 &&
!(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
KALDI_ERR << "Expected token \"" << token << "\", got instead \""
<< str <<"\".";
}
Expand Down
29 changes: 1 addition & 28 deletions src/doc/chain.dox
Original file line number Diff line number Diff line change
Expand Up @@ -389,34 +389,7 @@ on the paths.
You might notice in the current example scripts that we use iVectors. We do so
just because they generally help a bit, and because the baseline setup we were
comparing with, uses them. There is no inherent connection with 'chain'
models, and no fundamental requirement to use them. Actually we want to get rid
of them (see below).


\section chain_next_steps Next steps (TODOs) with 'chain' models

(Note: this list is valid as of Dec 13 2015, but may become out of date).
Things we need to do (and that we'd like help with) are:
- Supply example scripts (and tune them) on a wide range of corpora
(It will be interesting to see whether there are scale-dependent effects
affecting how well this model works).
- Create and tune LSTM and BLSTM versions of the training script. (This
may involve some playing around with learning rate schedules and
configurations).
- Figure out how to speed up the forward-backward part of the computation.
(E.g. using state-level pruning, or just by optimizing the current kernels or
data structures).

A longer-term TODO, which Dan should do, is to create an online decoding setup
for these models. Actually this isn't really distinct from nnet3 online
decoding in general, since the models are no different from regular nnet3
acoustic models. But we do have to decide whether to continue to support
iVectors-- getting rid of them would simplify the setup considerably, and
would hopefully make it more robust. We are hoping that with LSTMs, since it
already sees quite a wide acoustic context, iVector adaptation will no longer
be as helpful and could be dropped. We also have other ideas how to
incorporate adaptation as part of the neural network, without the use of
iVectors. This will require some experimentation.
models, and no fundamental requirement to use them.


*/
Expand Down
14 changes: 14 additions & 0 deletions src/lm/arpa-lm-compiler-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,17 @@ bool ScoringTest(bool seps, const string &infile, const string& sentence,
return ok;
}

bool ThrowsExceptionTest(bool seps, const string &infile) {
try {
// Make memory cleanup easy in both cases of try-catch block.
std::unique_ptr<ArpaLmCompiler> compiler(Compile(seps, infile));
return false;
} catch (const std::runtime_error&) {
// Kaldi throws only std::runtime_error in kaldi-error.cc
return true;
}
}

} // namespace kaldi

bool RunAllTests(bool seps) {
Expand All @@ -214,6 +225,9 @@ bool RunAllTests(bool seps) {

ok &= kaldi::ScoringTest(seps, "test_data/input.arpa", "b b b a", 59.2649);
ok &= kaldi::ScoringTest(seps, "test_data/input.arpa", "a b", 4.36082);

ok &= kaldi::ThrowsExceptionTest(seps, "test_data/missing_bos.arpa");

if (!ok) {
KALDI_WARN << "Tests " << (seps ? "with" : "without")
<< " epsilon substitution FAILED";
Expand Down
8 changes: 8 additions & 0 deletions src/lm/arpa-lm-compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -360,10 +360,18 @@ void ArpaLmCompiler::RemoveRedundantStates() {
<< fst_.NumStates();
}

void ArpaLmCompiler::Check() const {
if (fst_.Start() == fst::kNoStateId) {
KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
<< Symbols()->Find(Options().bos_symbol) << ".";
}
}

void ArpaLmCompiler::ReadComplete() {
fst_.SetInputSymbols(Symbols());
fst_.SetOutputSymbols(Symbols());
RemoveRedundantStates();
Check();
}

} // namespace kaldi
1 change: 1 addition & 0 deletions src/lm/arpa-lm-compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class ArpaLmCompiler : public ArpaFileParser {
// this function removes states that only have a backoff arc coming
// out of them.
void RemoveRedundantStates();
void Check() const;

int sub_eps_;
ArpaLmCompilerImplInterface* impl_; // Owned.
Expand Down
18 changes: 18 additions & 0 deletions src/lm/test_data/missing_bos.arpa
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

\data\
ngram 1=3
ngram 2=1
ngram 3=1

\1-grams:
-5.234679 a -3.3
-3.456783 b -3.0
-4.333333 </s>

\2-grams:
-1.45678 a b -3.23

\3-grams:
-0.23940 a b </s>

\end\
3 changes: 2 additions & 1 deletion src/nnet3/nnet-chain-training.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts,
if (opts.nnet_config.zero_component_stats)
ZeroComponentStats(nnet);
KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 &&
opts.nnet_config.max_param_change >= 0.0);
opts.nnet_config.max_param_change >= 0.0 &&
opts.nnet_config.backstitch_training_interval > 0);
delta_nnet_ = nnet_->Copy();
ScaleNnet(0.0, delta_nnet_);
const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);
Expand Down
4 changes: 2 additions & 2 deletions src/nnet3/nnet-simple-component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2854,8 +2854,8 @@ void NaturalGradientAffineComponent::Read(std::istream &is, bool binary) {
}
std::string token;
ReadToken(is, binary, &token);
if (token != "<NaturalGradientAffineComponent>" &&
token != "</NaturalGradientAffineComponent>")
// the following has to handle a couple variants of
if (token.find("NaturalGradientAffineComponent>") == std::string::npos)
KALDI_ERR << "Expected <NaturalGradientAffineComponent> or "
<< "</NaturalGradientAffineComponent>, got " << token;
SetNaturalGradientConfigs();
Expand Down
3 changes: 2 additions & 1 deletion src/nnet3/nnet-training.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config,
if (config.zero_component_stats)
ZeroComponentStats(nnet);
KALDI_ASSERT(config.momentum >= 0.0 &&
config.max_param_change >= 0.0);
config.max_param_change >= 0.0 &&
config.backstitch_training_interval > 0);
delta_nnet_ = nnet_->Copy();
ScaleNnet(0.0, delta_nnet_);
const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);
Expand Down

0 comments on commit 60f2bcf

Please sign in to comment.