GaofengCheng · GaofengCheng · Jun 14, 2017 · Jun 8, 2017 · Jun 9, 2017 · Jun 10, 2017
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
@@ -3,6 +3,10 @@
 # same as 1e but uses batchnorm components instead of renorm also adding
 # proportional-shrink 10, trained with 4 epochs
 
+
+# local/chain/tuning/run_tdnn_1f.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned  --gmm tri3_cleaned
+
+# local/chain/compare_wer_general.sh sdm1 tdnn1e_sp_bi_ihmali tdnn1f_sp_bi_ihmali
 # System               tdnn1e_sp_bi_ihmali tdnn1f_sp_bi_ihmali
 # WER on dev        39.2      37.5
 # WER on eval        42.8      41.3

diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
@@ -3,6 +3,10 @@
 # same as 1e but uses batchnorm components instead of renorm also adding
 # proportional-shrink 10, trained with 6 epochs
 
+
+# local/chain/tuning/run_tdnn_1g.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned  --gmm tri3_cleaned
+
+# local/chain/compare_wer_general.sh sdm1 tdnn1e_sp_bi_ihmali tdnn1g_sp_bi_ihmali
 # System               tdnn1e_sp_bi_ihmali tdnn1g_sp_bi_ihmali
 # WER on dev        39.2      36.9
 # WER on eval        42.8      41.0

diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
@@ -4,13 +4,16 @@
 # and using proportional-shrink with value 10, this model uses
 # 5 epochs to train
 
+# local/chain/tuning/run_tdnn_lstm_1n.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned  --gmm tri3_cleaned
+# local/chain/compare_wer_general.sh sdm1 tdnn_lstmli_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1n_sp_bi_ihmali_ld5
 # System               tdnn_lstmli_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1n_sp_bi_ihmali_ld5
-# WER on dev        36.9      34.6
-# WER on eval        40.4      37.9
-# Final train prob     -0.0867643 -0.134102
-# Final valid prob      -0.266945 -0.234435
-# Final train prob (xent)      -1.22333  -1.52368
-# Final valid prob (xent)      -2.13335  -2.02384
+# WER on dev        36.9      34.2
+# WER on eval        40.4      37.7
+# Final train prob     -0.0867643 -0.132712
+# Final valid prob      -0.266945 -0.234348
+# Final train prob (xent)      -1.22333  -1.5112
+# Final valid prob (xent)      -2.13335  -2.01698
+
 
 
 set -e -o pipefail
@@ -188,19 +191,19 @@ if [ $stage -le 15 ]; then
   fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
 
   # the first splicing is moved before the lda layer, so no splicing here
-  relu-renorm-layer name=tdnn1 dim=1024
-  relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
-  relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
+  relu-batchnorm-layer name=tdnn1 dim=1024
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
+  relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
 
   # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
   lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3
-  relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
   lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3
-  relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
-  relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
+  relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
   lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3
 
   ## adding the layers for chain branch

diff --git a/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh b/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+
+
+# 1d is as 1c but setting num-minibatches-history=40.
+# A bit better.
+
+# local/nnet3/compare.sh exp/cnn_aug_1c_cifar10 exp/cnn_aug_1d_cifar10
+# System                cnn_aug_1c_cifar10 cnn_aug_1d_cifar10
+# final test accuracy:       0.8834      0.8857
+# final train accuracy:       0.9644      0.9626
+# final test objf:         -0.362241   -0.356861
+# final train objf:        -0.114712   -0.114144
+# num-parameters:           2205290     2205290
+
+# local/nnet3/compare.sh exp/cnn_aug_1c_cifar100 exp/cnn_aug_1d_cifar100
+# System                cnn_aug_1c_cifar100 cnn_aug_1d_cifar100
+# final test accuracy:       0.6219      0.6237
+# final train accuracy:       0.8634      0.8688
+# final test objf:          -1.42399    -1.40784
+# final train objf:        -0.493349   -0.482047
+# num-parameters:           2251460     2251460
+
+
+# steps/info/nnet3_dir_info.pl exp/cnn_aug_1d_cifar10{,0}
+# exp/cnn_aug_1d_cifar10: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.24->-0.23 loglike:train/valid[132,199,final]=(-0.172,-0.114,-0.114/-0.38,-0.36,-0.36) accuracy:train/valid[132,199,final]=(0.938,0.963,0.963/0.879,0.887,0.886)
+# exp/cnn_aug_1d_cifar100: num-iters=200 nj=1..2 num-params=2.3M dim=96->100 combine=-0.90->-0.92 loglike:train/valid[132,199,final]=(-0.63,-0.48,-0.48/-1.43,-1.41,-1.41) accuracy:train/valid[132,199,final]=(0.821,0.868,0.869/0.61,0.62,0.62)
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=_aug_1d
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/cnn${affix}_${dataset}
+
+egs=exp/${dataset}_egs
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+  a="num-minibatches-history=40.0"
+  common1="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=32"
+  common2="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=64"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
+  relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512
+  output-layer name=output dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=100 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1c.sh b/egs/cifar/v1/local/nnet3/run_resnet_1c.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+
+# 1c is as 1b but setting num-minibatches-history=40.0 in the configs,
+# so the Fisher matrix estimates change less fast.
+# Seems to be helpfu.
+
+# local/nnet3/compare.sh exp/resnet1b_cifar10 exp/resnet1c_cifar10
+# System                resnet1b_cifar10 resnet1c_cifar10
+# final test accuracy:       0.9481      0.9514
+# final train accuracy:       0.9996           1
+# final test objf:         -0.163336   -0.157244
+# final train objf:      -0.00788341 -0.00751868
+# num-parameters:           1322730     1322730
+
+# local/nnet3/compare.sh exp/resnet1b_cifar100 exp/resnet1c_cifar100
+# System                resnet1b_cifar100 resnet1c_cifar100
+# final test accuracy:       0.7602      0.7627
+# final train accuracy:       0.9598        0.96
+# final test objf:         -0.888699   -0.862205
+# final train objf:        -0.164213   -0.174973
+# num-parameters:           1345860     1345860
+# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0}
+# exp/resnet1c_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.02->-0.01 loglike:train/valid[87,132,final]=(-0.115,-0.034,-0.0075/-0.24,-0.21,-0.157) accuracy:train/valid[87,132,final]=(0.960,0.9888,1.0000/0.925,0.938,0.951)
+# exp/resnet1c_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.24->-0.20 loglike:train/valid[87,132,final]=(-0.75,-0.27,-0.175/-1.20,-1.00,-0.86) accuracy:train/valid[87,132,final]=(0.78,0.923,0.960/0.67,0.73,0.76)
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+
+
+# training options
+stage=0
+train_stage=-10
+dataset=cifar10
+srand=0
+reporting_email=
+affix=1c
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+
+
+dir=exp/resnet${affix}_${dataset}
+
+egs=exp/${dataset}_egs2
+
+if [ ! -d $egs ]; then
+  echo "$0: expected directory $egs to exist.  Run the get_egs.sh commands in the"
+  echo "    run.sh before this script."
+  exit 1
+fi
+
+# check that the expected files are in the egs directory.
+
+for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
+         $egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
+         $egs/info/output_dim; do
+  if [ ! -e $f ]; then
+    echo "$0: expected file $f to exist."
+    exit 1;
+  fi
+done
+
+
+mkdir -p $dir/log
+
+
+if [ $stage -le 1 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(cat $egs/info/output_dim)
+
+  # Note: we hardcode in the CNN config that we are dealing with 32x3x color
+  # images.
+
+
+  nf1=48
+  nf2=96
+  nf3=256
+  nb3=128
+
+  a="num-minibatches-history=40.0"
+  common="$a required-time-offsets=0 height-offsets=-1,0,1"
+  res_opts="$a bypass-source=batchnorm"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=96 name=input
+  conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
+  res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
+  res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
+  conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
+  res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
+  res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
+  conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
+  res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
+  channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
+  output-layer name=output learning-rate-factor=0.1 dim=$num_targets
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 2 ]; then
+
+  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
+    --cmd="$train_cmd" \
+    --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=100 \
+    --egs.frames-per-eg=1 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.003 \
+    --trainer.optimization.final-effective-lrate=0.0003 \
+    --trainer.optimization.minibatch-size=256,128,64 \
+    --trainer.optimization.proportional-shrink=50.0 \
+    --trainer.shuffle-buffer-size=2000 \
+    --egs.dir="$egs" \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+
+exit 0;
diff --git a/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh b/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh
@@ -2,8 +2,9 @@
 
 # nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97)
 
-# steps/info/nnet3_dir_info.pl exp/cnn_aug1a:
-# exp/cnn_aug1a: num-iters=300 nj=1..2 num-params=2.8M dim=96->10 combine=-0.02->-0.02 loglike:train/valid[199,299,final]=(-0.01,-0.00,-0.00/-0.17,-0.17,-0.17) accuracy:train/valid[199,299,final]=(1.00,1.00,1.00/0.97,0.97,0.97)
+
+# steps/info/nnet3_dir_info.pl exp/cnn_aug1a
+# exp/cnn_aug1a: num-iters=130 nj=2..4 num-params=2.8M dim=96->10 combine=-0.07->-0.06 loglike:train/valid[85,129,final]=(-0.090,-0.060,-0.054/-0.163,-0.110,-0.102) accuracy:train/valid[85,129,final]=(0.9764,0.9868,0.9886/0.958,0.9731,0.9762)
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail