Merge pull request #5 from Szu-JuiChen/lmrescore

[egs] Add decode_only option for gmm and tdnn stage
Szu-JuiChen · Feb 3, 2018 · dc3289e · dc3289e
2 parents f873c36 + b9f337d
commit dc3289e
Show file tree

Hide file tree

Showing 5 changed files with 122 additions and 20 deletions.
diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh
@@ -59,7 +59,7 @@ remove_egs=true
 
 #decode options
 test_online_decoding=false  # if true, it will run the last decoding stage.
-decode_only=true # if true, it wou't train a model again
+decode_only=false # if true, it wouldn't train a model again and will only do decoding
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -91,12 +91,65 @@ if [ ! -d exp/tri3b_tr05_multi_${train} ]; then
   exit 1;
 fi
 
-local/nnet3/run_ivector_common.sh \
-  --stage $stage --nj $nj \
-  --train-set "$train_set" --gmm $gmm \
-  --test-sets "$test_sets" \
-  --num-threads-ubm $num_threads_ubm \
-  --nnet3-affix "$nnet3_affix"
+if $decode_only; then
+  mdir=`pwd`
+  # check ivector extractor
+  if [ ! -d $mdir/exp/nnet3${nnet3_affix}/extractor ]; then
+    echo "error, set $mdir correctly"
+    exit 1;
+  elif [ ! -d exp/nnet3${nnet3_affix}/extractor ]; then
+    echo "copy $mdir/exp/nnet3${nnet3_affix}/extractor"
+    mkdir -p exp/nnet3${nnet3_affix}
+    cp -r $mdir/exp/nnet3${nnet3_affix}/extractor exp/nnet3${nnet3_affix}/
+  fi
+  # check tdnn graph
+  if [ ! -d $mdir/exp/chain${nnet3_affix}/tree_a_sp/graph_tgpr_5k ]; then
+    echo "error, set $mdir correctly"
+    exit 1;
+  elif [ ! -d exp/chain${nnet3_affix}/tree_a_sp/graph_tgpr_5k ]; then
+    echo "copy $mdir/exp/chain${nnet3_affix}/tree_a_sp/graph_tgpr_5k"
+    mkdir -p exp/chain${nnet3_affix}/tree_a_sp
+    cp -r $mdir/exp/chain${nnet3_affix}/tree_a_sp/graph_tgpr_5k exp/chain${nnet3_affix}/tree_a_sp/
+  fi
+  # check dir
+  if [ ! -d $mdir/exp/chain${nnet3_affix}/tdnn${affix}_sp ]; then
+    echo "error, set $mdir correctly"
+    exit 1;
+  elif [ ! -d exp/chain${nnet3_affix}/tdnn${affix}_sp ]; then
+    echo "copy $mdir/exp/chain${nnet3_affix}/tdnn${affix}_sp"
+    cp -r $mdir/exp/chain${nnet3_affix}/tdnn${affix}_sp exp/chain${nnet3_affix}/
+  fi
+  # make ivector for dev and eval
+  for datadir in ${test_sets}; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # extracting hires features
+  for datadir in ${test_sets}; do
+    steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires
+    steps/compute_cmvn_stats.sh data/${datadir}_hires
+    utils/fix_data_dir.sh data/${datadir}_hires
+  done
+
+  # extract iVectors for the test data, but in this case we don't need the speed
+  # perturbation (sp).
+  for data in ${test_sets}; do
+    nspk=$(wc -l <data/${data}_hires/spk2utt)
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "${nspk}" \
+    data/${data}_hires exp/nnet3${nnet3_affix}/extractor \
+    exp/nnet3${nnet3_affix}/ivectors_${data}_hires
+  done
+  # directly do decoding
+  stage=18
+else
+  local/nnet3/run_ivector_common.sh \
+    --stage $stage --nj $nj \
+    --train-set "$train_set" --gmm $gmm \
+    --test-sets "$test_sets" \
+    --num-threads-ubm $num_threads_ubm \
+    --nnet3-affix "$nnet3_affix"
+fi
 
 gmm_dir=exp/${gmm}
 ali_dir=exp/${gmm}_ali_${train_set}_sp
@@ -121,7 +174,7 @@ for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 done
 
-if [ $stage -le 12 ] && [ ! $decode_only ]; then
+if [ $stage -le 12 ]; then
   echo "$0: creating lang directory $lang with chain-type topology"
   # Create a version of the lang/ directory that has one state per phone in the
   # topo file. [note, it really has two states.. the first one is only repeated
@@ -144,15 +197,15 @@ if [ $stage -le 12 ] && [ ! $decode_only ]; then
   fi
 fi
 
-if [ $stage -le 13 ] && [ ! $decode_only ]; then
+if [ $stage -le 13 ]; then
   # Get the alignments as lattices (gives the chain training more freedom).
   # use the same num-jobs as the alignments
   steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \
     data/lang $gmm_dir $lat_dir
   rm $lat_dir/fsts.*.gz # save space
 fi
 
-if [ $stage -le 14 ] && [ ! $decode_only ]; then
+if [ $stage -le 14 ]; then
   # Build a tree using our new topology.  We know we have alignments for the
   # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
   # those.  The num-leaves is always somewhat less than the num-leaves from
@@ -169,7 +222,7 @@ if [ $stage -le 14 ] && [ ! $decode_only ]; then
 fi
 
 
-if [ $stage -le 15 ] && [ ! $decode_only ]; then
+if [ $stage -le 15 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
 
@@ -216,7 +269,7 @@ EOF
 fi
 
 
-if [ $stage -le 16 ] && [ ! $decode_only ]; then
+if [ $stage -le 16 ]; then
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
     utils/create_split_dir.pl \
      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/chime4-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage

diff --git a/egs/chime4/s5_1ch/local/run_gmm.sh b/egs/chime4/s5_1ch/local/run_gmm.sh
@@ -18,6 +18,7 @@ stage=0 # resume training with --stage=N
 train=noisy # noisy data multi-condition training
 eval_flag=true # make it true when the evaluation data are released
 add_enhaced_data=true # make it true when you want to add enhanced data into training set
+decode_only=false # if true, it wouldn't train a model again and will only do decoding
 
 . utils/parse_options.sh || exit 1;
 
@@ -50,6 +51,33 @@ if [ ! -d data/lang ]; then
   exit 1;
 fi
 
+if $decode_only; then
+  # check data/loca/data
+  mdir=`pwd`
+  if [ ! -d $mdir/data/local/data ]; then
+    echo "error, set $mdir correctly"
+    exit 1;
+  elif [ ! -d data/local/data ]; then
+    echo "copy $mdir/data/local/data"
+    mkdir -p data/local
+    cp -r $mdir/data/local/data data/local/
+  fi
+  # check gmm model
+  if [ ! -d $mdir/exp/tri3b_tr05_multi_${train} ]; then
+    echo "error, set $mdir correctly"
+    exit 1;
+  elif [ ! -d exp/tri3b_tr05_multi_${train} ]; then
+    echo "copy $mdir/exp/tri3b_tr05_multi_${train}"
+    mkdir -p exp
+    cp -r $mdir/exp/tri3b_tr05_multi_${train} exp/
+  fi
+  # process for enhanced data
+  if [ ! -d data/dt05_real_$enhan ] || [ ! -d data/et05_real_$enhan ]; then
+    local/real_enhan_chime4_data_prep.sh $enhan $enhan_data
+    local/simu_enhan_chime4_data_prep.sh $enhan $enhan_data
+  fi
+  stage=6
+fi
 #######################
 #### training #########
 if [ $stage -le 1 ]; then

diff --git a/egs/chime4/s5_1ch/run.sh b/egs/chime4/s5_1ch/run.sh
@@ -22,7 +22,13 @@
 
 # Config:
 stage=0 # resume training with --stage N
-enhancement=single_BLSTMmask #### or your method 
+enhancement=single_BLSTMmask #### or your method
+# if the following options are true, they wouldn't train a model again and will only do decoding
+gmm_decode_only_flag=false
+tdnn_decode_only_flag=false
+# make it true when you want to add enhanced data into training set. But please note that when changing enhancement method,
+# you may need to retrain from run_gmm.sh and avoid using decode-only options above
+add_enhaced_data_flag=true
 
 . utils/parse_options.sh || exit 1;
 
@@ -89,14 +95,15 @@ fi
 # Please set a directory of your speech enhancement method.
 # The directory structure and audio files must follow the attached baseline enhancement directory
 if [ $stage -le 3 ]; then
-  local/run_gmm.sh $enhancement $enhancement_data $chime4_data
+  local/run_gmm.sh --add-enhaced-data $add_enhaced_data_flag \
+    --decode-only $gmm_decode_only_flag $enhancement $enhancement_data $chime4_data
 fi
 
 # DNN based ASR experiment
 # Since it takes time to evaluate DNN, we make the GMM and DNN scripts separately.
 # You may execute it after you would have promising results using GMM-based ASR experiments
 if [ $stage -le 4 ]; then
-  local/chain/run_tdnn.sh $enhancement
+  local/chain/run_tdnn.sh --decode-only $tdnn_decode_only_flag $enhancement
 fi
 
 # LM-rescoring experiment with 5-gram and RNN LMs

diff --git a/egs/chime4/s5_2ch/run.sh b/egs/chime4/s5_2ch/run.sh
@@ -24,6 +24,12 @@
 # Config:
 stage=0 # resume training with --stage N
 enhancement=blstm_gev #### or your method 
+# if the following options are true, they wouldn't train a model again and will only do decoding
+gmm_decode_only_flag=false
+tdnn_decode_only_flag=false
+# make it true when you want to add enhanced data into training set. But please note that when changing enhancement method,
+# you may need to retrain from run_gmm.sh and avoid using decode-only options above
+add_enhaced_data_flag=true
 
 . utils/parse_options.sh || exit 1;
 
@@ -101,14 +107,15 @@ fi
 # Please set a directory of your speech enhancement method.
 # The directory structure and audio files must follow the attached baseline enhancement directory
 if [ $stage -le 3 ]; then
-  local/run_gmm.sh $enhancement $enhancement_data $chime4_data
+  local/run_gmm.sh --add-enhaced-data $add_enhaced_data_flag \
+    --decode-only $gmm_decode_only_flag $enhancement $enhancement_data $chime4_data
 fi
 
 # DNN based ASR experiment
 # Since it takes time to evaluate DNN, we make the GMM and DNN scripts separately.
 # You may execute it after you would have promising results using GMM-based ASR experiments
 if [ $stage -le 4 ]; then
-  local/chain/run_tdnn.sh $enhancement
+  local/chain/run_tdnn.sh --decode-only $tdnn_decode_only_flag $enhancement
 fi
 
 # LM-rescoring experiment with 5-gram and RNN LMs

diff --git a/egs/chime4/s5_6ch/run.sh b/egs/chime4/s5_6ch/run.sh
@@ -21,7 +21,13 @@
 
 # Config:
 stage=0 # resume training with --stage N
-enhancement=blstm_gev #### or your method 
+enhancement=blstm_gev #### or your method
+# if the following options are true, they wouldn't train a model again and will only do decoding
+gmm_decode_only_flag=false
+tdnn_decode_only_flag=false
+# make it true when you want to add enhanced data into training set. But please note that when changing enhancement method,
+# you may need to retrain from run_gmm.sh and avoid using decode-only options above
+add_enhaced_data_flag=true
 
 . utils/parse_options.sh || exit 1;
 
@@ -102,14 +108,15 @@ fi
 # Please set a directory of your speech enhancement method.
 # The directory structure and audio files must follow the attached baseline enhancement directory
 if [ $stage -le 3 ]; then
-  local/run_gmm.sh $enhancement $enhancement_data $chime4_data
+  local/run_gmm.sh --add-enhaced-data $add_enhaced_data_flag \
+    --decode-only $gmm_decode_only_flag $enhancement $enhancement_data $chime4_data
 fi
 
 # DNN based ASR experiment
 # Since it takes time to evaluate DNN, we make the GMM and DNN scripts separately.
 # You may execute it after you would have promising results using GMM-based ASR experiments
 if [ $stage -le 4 ]; then
-  local/chain/run_tdnn.sh $enhancement
+  local/chain/run_tdnn.sh --decode-only $tdnn_decode_only_flag $enhancement
 fi
 
 # LM-rescoring experiment with 5-gram and RNN LMs