Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
# same as 1e but uses batchnorm components instead of renorm also adding
# proportional-shrink 10, trained with 4 epochs


# local/chain/tuning/run_tdnn_1f.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned

# local/chain/compare_wer_general.sh sdm1 tdnn1e_sp_bi_ihmali tdnn1f_sp_bi_ihmali
# System tdnn1e_sp_bi_ihmali tdnn1f_sp_bi_ihmali
# WER on dev 39.2 37.5
# WER on eval 42.8 41.3
Expand Down
4 changes: 4 additions & 0 deletions egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
# same as 1e but uses batchnorm components instead of renorm also adding
# proportional-shrink 10, trained with 6 epochs


# local/chain/tuning/run_tdnn_1g.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned

# local/chain/compare_wer_general.sh sdm1 tdnn1e_sp_bi_ihmali tdnn1g_sp_bi_ihmali
# System tdnn1e_sp_bi_ihmali tdnn1g_sp_bi_ihmali
# WER on dev 39.2 36.9
# WER on eval 42.8 41.0
Expand Down
33 changes: 18 additions & 15 deletions egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
# and using proportional-shrink with value 10, this model uses
# 5 epochs to train

# local/chain/tuning/run_tdnn_lstm_1n.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned
# local/chain/compare_wer_general.sh sdm1 tdnn_lstmli_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1n_sp_bi_ihmali_ld5
# System tdnn_lstmli_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1n_sp_bi_ihmali_ld5
# WER on dev 36.9 34.6
# WER on eval 40.4 37.9
# Final train prob -0.0867643 -0.134102
# Final valid prob -0.266945 -0.234435
# Final train prob (xent) -1.22333 -1.52368
# Final valid prob (xent) -2.13335 -2.02384
# WER on dev 36.9 34.2
# WER on eval 40.4 37.7
# Final train prob -0.0867643 -0.132712
# Final valid prob -0.266945 -0.234348
# Final train prob (xent) -1.22333 -1.5112
# Final valid prob (xent) -2.13335 -2.01698



set -e -o pipefail
Expand Down Expand Up @@ -188,19 +191,19 @@ if [ $stage -le 15 ]; then
fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat

# the first splicing is moved before the lda layer, so no splicing here
relu-renorm-layer name=tdnn1 dim=1024
relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
relu-batchnorm-layer name=tdnn1 dim=1024
relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024

# check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3
relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3
relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3

## adding the layers for chain branch
Expand Down
134 changes: 134 additions & 0 deletions egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/bin/bash


# 1d is as 1c but setting num-minibatches-history=40.
# A bit better.

# local/nnet3/compare.sh exp/cnn_aug_1c_cifar10 exp/cnn_aug_1d_cifar10
# System cnn_aug_1c_cifar10 cnn_aug_1d_cifar10
# final test accuracy: 0.8834 0.8857
# final train accuracy: 0.9644 0.9626
# final test objf: -0.362241 -0.356861
# final train objf: -0.114712 -0.114144
# num-parameters: 2205290 2205290

# local/nnet3/compare.sh exp/cnn_aug_1c_cifar100 exp/cnn_aug_1d_cifar100
# System cnn_aug_1c_cifar100 cnn_aug_1d_cifar100
# final test accuracy: 0.6219 0.6237
# final train accuracy: 0.8634 0.8688
# final test objf: -1.42399 -1.40784
# final train objf: -0.493349 -0.482047
# num-parameters: 2251460 2251460


# steps/info/nnet3_dir_info.pl exp/cnn_aug_1d_cifar10{,0}
# exp/cnn_aug_1d_cifar10: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.24->-0.23 loglike:train/valid[132,199,final]=(-0.172,-0.114,-0.114/-0.38,-0.36,-0.36) accuracy:train/valid[132,199,final]=(0.938,0.963,0.963/0.879,0.887,0.886)
# exp/cnn_aug_1d_cifar100: num-iters=200 nj=1..2 num-params=2.3M dim=96->100 combine=-0.90->-0.92 loglike:train/valid[132,199,final]=(-0.63,-0.48,-0.48/-1.43,-1.41,-1.41) accuracy:train/valid[132,199,final]=(0.821,0.868,0.869/0.61,0.62,0.62)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=_aug_1d


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/cnn${affix}_${dataset}

egs=exp/${dataset}_egs

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.

a="num-minibatches-history=40.0"
common1="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=32"
common2="$a required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=64"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1
conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2
conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2
relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512
output-layer name=output dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=100 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=1 \
--trainer.optimization.num-jobs-final=2 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0001 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
144 changes: 144 additions & 0 deletions egs/cifar/v1/local/nnet3/run_resnet_1c.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/bin/bash

# 1c is as 1b but setting num-minibatches-history=40.0 in the configs,
# so the Fisher matrix estimates change less fast.
# Seems to be helpfu.

# local/nnet3/compare.sh exp/resnet1b_cifar10 exp/resnet1c_cifar10
# System resnet1b_cifar10 resnet1c_cifar10
# final test accuracy: 0.9481 0.9514
# final train accuracy: 0.9996 1
# final test objf: -0.163336 -0.157244
# final train objf: -0.00788341 -0.00751868
# num-parameters: 1322730 1322730

# local/nnet3/compare.sh exp/resnet1b_cifar100 exp/resnet1c_cifar100
# System resnet1b_cifar100 resnet1c_cifar100
# final test accuracy: 0.7602 0.7627
# final train accuracy: 0.9598 0.96
# final test objf: -0.888699 -0.862205
# final train objf: -0.164213 -0.174973
# num-parameters: 1345860 1345860
# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0}
# exp/resnet1c_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.02->-0.01 loglike:train/valid[87,132,final]=(-0.115,-0.034,-0.0075/-0.24,-0.21,-0.157) accuracy:train/valid[87,132,final]=(0.960,0.9888,1.0000/0.925,0.938,0.951)
# exp/resnet1c_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.24->-0.20 loglike:train/valid[87,132,final]=(-0.75,-0.27,-0.175/-1.20,-1.00,-0.86) accuracy:train/valid[87,132,final]=(0.78,0.923,0.960/0.67,0.73,0.76)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=1c


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/resnet${affix}_${dataset}

egs=exp/${dataset}_egs2

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.


nf1=48
nf2=96
nf3=256
nb3=128

a="num-minibatches-history=40.0"
common="$a required-time-offsets=0 height-offsets=-1,0,1"
res_opts="$a bypass-source=batchnorm"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
output-layer name=output learning-rate-factor=0.1 dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=100 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=1 \
--trainer.optimization.num-jobs-final=2 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=50.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
5 changes: 3 additions & 2 deletions egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

# nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97)

# steps/info/nnet3_dir_info.pl exp/cnn_aug1a:
# exp/cnn_aug1a: num-iters=300 nj=1..2 num-params=2.8M dim=96->10 combine=-0.02->-0.02 loglike:train/valid[199,299,final]=(-0.01,-0.00,-0.00/-0.17,-0.17,-0.17) accuracy:train/valid[199,299,final]=(1.00,1.00,1.00/0.97,0.97,0.97)

# steps/info/nnet3_dir_info.pl exp/cnn_aug1a
# exp/cnn_aug1a: num-iters=130 nj=2..4 num-params=2.8M dim=96->10 combine=-0.07->-0.06 loglike:train/valid[85,129,final]=(-0.090,-0.060,-0.054/-0.163,-0.110,-0.102) accuracy:train/valid[85,129,final]=(0.9764,0.9868,0.9886/0.958,0.9731,0.9762)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
Expand Down
Loading