diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh index afdaa347055..8bf2b73dada 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh @@ -3,6 +3,10 @@ # same as 1e but uses batchnorm components instead of renorm also adding # proportional-shrink 10, trained with 4 epochs + +# local/chain/tuning/run_tdnn_1f.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned + +# local/chain/compare_wer_general.sh sdm1 tdnn1e_sp_bi_ihmali tdnn1f_sp_bi_ihmali # System tdnn1e_sp_bi_ihmali tdnn1f_sp_bi_ihmali # WER on dev 39.2 37.5 # WER on eval 42.8 41.3 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh index 5900296b671..dfb6dfedee7 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh @@ -3,6 +3,10 @@ # same as 1e but uses batchnorm components instead of renorm also adding # proportional-shrink 10, trained with 6 epochs + +# local/chain/tuning/run_tdnn_1g.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned + +# local/chain/compare_wer_general.sh sdm1 tdnn1e_sp_bi_ihmali tdnn1g_sp_bi_ihmali # System tdnn1e_sp_bi_ihmali tdnn1g_sp_bi_ihmali # WER on dev 39.2 36.9 # WER on eval 42.8 41.0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh index 6cdca0d222b..bee4d997b01 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -4,13 +4,16 @@ # and using proportional-shrink with value 10, this model uses # 5 epochs to train +# local/chain/tuning/run_tdnn_lstm_1n.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +# local/chain/compare_wer_general.sh sdm1 tdnn_lstmli_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1n_sp_bi_ihmali_ld5 # System tdnn_lstmli_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1n_sp_bi_ihmali_ld5 -# WER on dev 36.9 34.6 -# WER on eval 40.4 37.9 -# Final train prob -0.0867643 -0.134102 -# Final valid prob -0.266945 -0.234435 -# Final train prob (xent) -1.22333 -1.52368 -# Final valid prob (xent) -2.13335 -2.02384 +# WER on dev 36.9 34.2 +# WER on eval 40.4 37.7 +# Final train prob -0.0867643 -0.132712 +# Final valid prob -0.266945 -0.234348 +# Final train prob (xent) -1.22333 -1.5112 +# Final valid prob (xent) -2.13335 -2.01698 + set -e -o pipefail @@ -188,19 +191,19 @@ if [ $stage -le 15 ]; then fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat # the first splicing is moved before the lda layer, so no splicing here - relu-renorm-layer name=tdnn1 dim=1024 - relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 - relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + relu-batchnorm-layer name=tdnn1 dim=1024 + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 - relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 - relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 - relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 - relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 - relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 - relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 ## adding the layers for chain branch diff --git a/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh b/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh new file mode 100755 index 00000000000..1eb448149ba --- /dev/null +++ b/egs/cifar/v1/local/nnet3/run_cnn_aug_1d.sh @@ -0,0 +1,134 @@ +#!/bin/bash + + +# 1d is as 1c but setting num-minibatches-history=40. +# A bit better. + +# local/nnet3/compare.sh exp/cnn_aug_1c_cifar10 exp/cnn_aug_1d_cifar10 +# System cnn_aug_1c_cifar10 cnn_aug_1d_cifar10 +# final test accuracy: 0.8834 0.8857 +# final train accuracy: 0.9644 0.9626 +# final test objf: -0.362241 -0.356861 +# final train objf: -0.114712 -0.114144 +# num-parameters: 2205290 2205290 + +# local/nnet3/compare.sh exp/cnn_aug_1c_cifar100 exp/cnn_aug_1d_cifar100 +# System cnn_aug_1c_cifar100 cnn_aug_1d_cifar100 +# final test accuracy: 0.6219 0.6237 +# final train accuracy: 0.8634 0.8688 +# final test objf: -1.42399 -1.40784 +# final train objf: -0.493349 -0.482047 +# num-parameters: 2251460 2251460 + + +# steps/info/nnet3_dir_info.pl exp/cnn_aug_1d_cifar10{,0} +# exp/cnn_aug_1d_cifar10: num-iters=200 nj=1..2 num-params=2.2M dim=96->10 combine=-0.24->-0.23 loglike:train/valid[132,199,final]=(-0.172,-0.114,-0.114/-0.38,-0.36,-0.36) accuracy:train/valid[132,199,final]=(0.938,0.963,0.963/0.879,0.887,0.886) +# exp/cnn_aug_1d_cifar100: num-iters=200 nj=1..2 num-params=2.3M dim=96->100 combine=-0.90->-0.92 loglike:train/valid[132,199,final]=(-0.63,-0.48,-0.48/-1.43,-1.41,-1.41) accuracy:train/valid[132,199,final]=(0.821,0.868,0.869/0.61,0.62,0.62) + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +dataset=cifar10 +srand=0 +reporting_email= +affix=_aug_1d + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2 + relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512 + output-layer name=output dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$train_cmd" \ + --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=100 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/cifar/v1/local/nnet3/run_resnet_1c.sh b/egs/cifar/v1/local/nnet3/run_resnet_1c.sh new file mode 100755 index 00000000000..0708b3d6eaa --- /dev/null +++ b/egs/cifar/v1/local/nnet3/run_resnet_1c.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +# 1c is as 1b but setting num-minibatches-history=40.0 in the configs, +# so the Fisher matrix estimates change less fast. +# Seems to be helpfu. + +# local/nnet3/compare.sh exp/resnet1b_cifar10 exp/resnet1c_cifar10 +# System resnet1b_cifar10 resnet1c_cifar10 +# final test accuracy: 0.9481 0.9514 +# final train accuracy: 0.9996 1 +# final test objf: -0.163336 -0.157244 +# final train objf: -0.00788341 -0.00751868 +# num-parameters: 1322730 1322730 + +# local/nnet3/compare.sh exp/resnet1b_cifar100 exp/resnet1c_cifar100 +# System resnet1b_cifar100 resnet1c_cifar100 +# final test accuracy: 0.7602 0.7627 +# final train accuracy: 0.9598 0.96 +# final test objf: -0.888699 -0.862205 +# final train objf: -0.164213 -0.174973 +# num-parameters: 1345860 1345860 +# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0} +# exp/resnet1c_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.02->-0.01 loglike:train/valid[87,132,final]=(-0.115,-0.034,-0.0075/-0.24,-0.21,-0.157) accuracy:train/valid[87,132,final]=(0.960,0.9888,1.0000/0.925,0.938,0.951) +# exp/resnet1c_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.24->-0.20 loglike:train/valid[87,132,final]=(-0.75,-0.27,-0.175/-1.20,-1.00,-0.86) accuracy:train/valid[87,132,final]=(0.78,0.923,0.960/0.67,0.73,0.76) + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +dataset=cifar10 +srand=0 +reporting_email= +affix=1c + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1 + res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts + res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts + conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2 + res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts + res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts + conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3 + res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3 + output-layer name=output learning-rate-factor=0.1 dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$train_cmd" \ + --image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=100 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0003 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.optimization.proportional-shrink=50.0 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh b/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh index 96e7254474a..e89ff125102 100755 --- a/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh +++ b/egs/svhn/v1/local/nnet3/run_cnn_aug_1a.sh @@ -2,8 +2,9 @@ # nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97) -# steps/info/nnet3_dir_info.pl exp/cnn_aug1a: -# exp/cnn_aug1a: num-iters=300 nj=1..2 num-params=2.8M dim=96->10 combine=-0.02->-0.02 loglike:train/valid[199,299,final]=(-0.01,-0.00,-0.00/-0.17,-0.17,-0.17) accuracy:train/valid[199,299,final]=(1.00,1.00,1.00/0.97,0.97,0.97) + +# steps/info/nnet3_dir_info.pl exp/cnn_aug1a +# exp/cnn_aug1a: num-iters=130 nj=2..4 num-params=2.8M dim=96->10 combine=-0.07->-0.06 loglike:train/valid[85,129,final]=(-0.090,-0.060,-0.054/-0.163,-0.110,-0.102) accuracy:train/valid[85,129,final]=(0.9764,0.9868,0.9886/0.958,0.9731,0.9762) # Set -e here so that we catch if any executable fails immediately set -euo pipefail diff --git a/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh b/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh new file mode 100755 index 00000000000..cf2f92590d2 --- /dev/null +++ b/egs/svhn/v1/local/nnet3/run_cnn_aug_1b.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# run_cnn_aug_1b.sh is like run_cnn_aug_1a.sh but setting +# num-minibatches-history=40.0 (longer history for natural gradient), +# and using the "egs2" examples with more archives, which necessitates +# adjusting the proportional-shrink option (since it should be +# proportional to archive size). + +# result improves 97.62 -> 97.71. + +# steps/info/nnet3_dir_info.pl exp/cnn_aug1b +# exp/cnn_aug1b: num-iters=180 nj=2..4 num-params=2.8M dim=96->10 combine=-0.06->-0.06 loglike:train/valid[119,179,final]=(-0.066,-0.051,-0.049/-0.126,-0.103,-0.100) accuracy:train/valid[119,179,final]=(0.9846,0.9890,0.9900/0.970,0.9760,0.9771) + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +srand=0 +reporting_email= +affix=_aug1b + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-relu-batchnorm-layer name=cnn1 height-in=32 height-out=32 time-offsets=-1,0,1 $common1 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=32 height-out=16 time-offsets=-1,0,1 dropout-proportion=0.25 $common1 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn3 height-in=16 height-out=16 time-offsets=-2,0,2 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=16 height-out=8 time-offsets=-2,0,2 dropout-proportion=0.25 $common2 height-subsample-out=2 + conv-relu-batchnorm-layer name=cnn5 height-in=8 height-out=8 time-offsets=-4,0,4 $common2 + relu-dropout-layer name=fully_connected1 input=Append(2,6,10,14,18,22,26,30) dropout-proportion=0.5 dim=512 + output-layer name=output dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$cmd" \ + --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=30 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=4 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0003 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.optimization.proportional-shrink=18.0 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/svhn/v1/local/nnet3/run_resnet_1b.sh b/egs/svhn/v1/local/nnet3/run_resnet_1b.sh index 7e6ab60eae3..7f0540e90fe 100755 --- a/egs/svhn/v1/local/nnet3/run_resnet_1b.sh +++ b/egs/svhn/v1/local/nnet3/run_resnet_1b.sh @@ -1,6 +1,6 @@ #!/bin/bash -# exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.055,-0.041,-0.035/-0.097,-0.079,-0.074) accuracy:train/valid[85,129,final]=(0.9882,0.9924,0.9946/0.977,0.9817,0.9840) +# exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.049,-0.044,-0.036/-0.098,-0.085,-0.076) accuracy:train/valid[85,129,final]=(0.9904,0.9908,0.9940/0.9764,0.9804,0.9831) # This setup is based on the one in cifar/v1/local/nnet3/run_resnet_1{a,b}.sh. # We are reducing the number of epochs quite a bit, since there is so much @@ -107,7 +107,7 @@ fi if [ $stage -le 2 ]; then steps/nnet3/train_raw_dnn.py --stage=$train_stage \ - --cmd="$train_cmd" \ + --cmd="$cmd" \ --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \ --trainer.srand=$srand \ --trainer.max-param-change=2.0 \ diff --git a/egs/svhn/v1/local/nnet3/run_resnet_1c.sh b/egs/svhn/v1/local/nnet3/run_resnet_1c.sh new file mode 100755 index 00000000000..b56ee62b806 --- /dev/null +++ b/egs/svhn/v1/local/nnet3/run_resnet_1c.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +# resnet1c is as resnet1b but adding "num-minibatches-history=40.0" to +# all layers to increase the history size of natural gradient +# (improves optimization), and using the "egs2" egs with more, +# smaller archives. Also changing the proportional-shrink option +# to compensate for the change in archive size (it should vary +# proportionally to the number of egs in the archive). + +# improves 98.31 -> 98.45. + +# exp/resnet1c: num-iters=180 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.03 loglike:train/valid[119,179,final]=(-0.047,-0.041,-0.034/-0.083,-0.075,-0.071) accuracy:train/valid[119,179,final]=(0.9914,0.9922,0.9944/0.9803,0.9826,0.9845) + + + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + + + +# training options +stage=0 +train_stage=-10 +srand=0 +reporting_email= +affix=1b5 + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=96 name=input + conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1 + res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts + res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts + conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2 + res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts + res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts + conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3 + res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts + channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3 + output-layer name=output learning-rate-factor=0.1 dim=$num_targets +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 2 ]; then + + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$cmd" \ + --image.augmentation-opts="--horizontal-shift=0.04 --vertical-shift=0.08 --num-channels=3" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=30 \ + --egs.frames-per-eg=1 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=4 \ + --trainer.optimization.initial-effective-lrate=0.003 \ + --trainer.optimization.final-effective-lrate=0.0003 \ + --trainer.optimization.minibatch-size=256,128,64 \ + --trainer.optimization.proportional-shrink=18.0 \ + --trainer.shuffle-buffer-size=2000 \ + --egs.dir="$egs" \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + + +exit 0; diff --git a/egs/svhn/v1/run.sh b/egs/svhn/v1/run.sh index fc2e2ef7733..720f4a13e29 100755 --- a/egs/svhn/v1/run.sh +++ b/egs/svhn/v1/run.sh @@ -19,3 +19,9 @@ if [ $stage -le 1 ]; then # egs preparation image/nnet3/get_egs.sh --egs-per-archive 50000 --cmd "$cmd" data/train_all data/test exp/egs fi + +if [ $stage -le 2 ]; then + # Making a version of the egs that have more archives with fewer egs each (this seems to + # slightly improve results). Eventually we'll disable the creation of the egs above. + image/nnet3/get_egs.sh --egs-per-archive 35000 --cmd "$cmd" data/train_all data/test exp/egs2 +fi diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh index 08eeba59c3d..4c578c20ad1 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh @@ -3,8 +3,8 @@ # run_tdnn_1e.sh is like run_tdnn_1d.sh but batchnorm components instead of renorm -exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.098->-0.097 xent:train/valid[167,252,final]=(-1.40,-1.34,-1.34/-1.50,-1.46,-1.46) logprob:train/valid[167,252,final]=(-0.091,-0.083,-0.083/-0.104,-0.101,-0.101) -exp/chain_cleaned/tdnn1e_sp_bi/: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.095->-0.095 xent:train/valid[167,252,final]=(-1.37,-1.31,-1.31/-1.47,-1.44,-1.44) logprob:train/valid[167,252,final]=(-0.087,-0.078,-0.078/-0.102,-0.099,-0.099) +# exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.098->-0.097 xent:train/valid[167,252,final]=(-1.40,-1.34,-1.34/-1.50,-1.46,-1.46) logprob:train/valid[167,252,final]=(-0.091,-0.083,-0.083/-0.104,-0.101,-0.101) +# exp/chain_cleaned/tdnn1e_sp_bi/: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3597 combine=-0.095->-0.095 xent:train/valid[167,252,final]=(-1.37,-1.31,-1.31/-1.47,-1.44,-1.44) logprob:train/valid[167,252,final]=(-0.087,-0.078,-0.078/-0.102,-0.099,-0.099) # local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1d_sp_bi exp/chain_cleaned/tdnn1e_sp_bi # System tdnn1d_sp_bi tdnn1e_sp_bi @@ -49,7 +49,7 @@ nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned # are just hardcoded at this level, in the commands below. train_stage=-10 tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. -tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1e #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. common_egs_dir= # you can set this to use previously dumped egs. # End configuration section. diff --git a/egs/wsj/s5/steps/info/nnet3_dir_info.pl b/egs/wsj/s5/steps/info/nnet3_dir_info.pl index ad4a86e4afd..06d07a63755 100755 --- a/egs/wsj/s5/steps/info/nnet3_dir_info.pl +++ b/egs/wsj/s5/steps/info/nnet3_dir_info.pl @@ -146,9 +146,9 @@ sub get_combine_info { sub number_to_string { my ($value, $name) = @_; my $precision; - if (abs($value) < 0.02 or ($name eq "accuracy" and abs($value) > 0.98)) { + if (abs($value) < 0.02 or ($name eq "accuracy" and abs($value) > 0.97)) { $precision = 4; - } elsif (abs($value) < 0.2 or ($name eq "accuracy" and abs($value) > 0.8)) { + } elsif (abs($value) < 0.2 or ($name eq "accuracy" and abs($value) > 0.7)) { $precision = 3; } else { $precision = 2; diff --git a/src/bin/ali-to-post.cc b/src/bin/ali-to-post.cc index 589d9d64afe..ac87d676c06 100644 --- a/src/bin/ali-to-post.cc +++ b/src/bin/ali-to-post.cc @@ -33,10 +33,18 @@ int main(int argc, char *argv[]) { typedef kaldi::int32 int32; try { const char *usage = - "Convert alignments to posteriors\n" + "Convert alignments to posteriors. This is simply a format change\n" + "from integer vectors to Posteriors, which are vectors of lists of\n" + "pairs (int, float) where the float represents the posterior. The\n" + "floats would all be 1.0 in this case.\n" + "The posteriors will still be in terms of whatever integer index\n" + "the input contained, which will be transition-ids if they came\n" + "directly from decoding, or pdf-ids if they were processed by\n" + "ali-to-post.\n" "Usage: ali-to-post [options] \n" "e.g.:\n" - " ali-to-post ark:1.ali ark:1.post\n"; + " ali-to-post ark:1.ali ark:1.post\n" + "See also: ali-to-pdf, ali-to-phones, show-alignments, post-to-weights\n"; ParseOptions po(usage); @@ -69,5 +77,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/bin/analyze-counts.cc b/src/bin/analyze-counts.cc index 80d43891696..6eeb90d30df 100644 --- a/src/bin/analyze-counts.cc +++ b/src/bin/analyze-counts.cc @@ -43,8 +43,9 @@ int main(int argc, char *argv[]) { "e.g.: \n" " analyze-counts ark:1.ali prior.counts\n" " Show phone counts by:\n" - " ali-to-phone --per-frame=true ark:1.ali ark:- |" - " analyze-counts --verbose=1 ark:- - >/dev/null\n"; + " ali-to-phones --per-frame=true ark:1.ali ark:- |" + " analyze-counts --verbose=1 ark:- - >/dev/null\n" + "Note: this is deprecated, see post-to-tacc.\n"; ParseOptions po(usage); diff --git a/src/configure b/src/configure index 4bfe6bc8470..1d062feccc2 100755 --- a/src/configure +++ b/src/configure @@ -663,7 +663,7 @@ function linux_configure_static { done if [ "$ATLASLIBS" == "" ]; then echo Could not find any libraries $ATLASLIBDIR/{liblapack,liblapack_atlas,libclapack} that seem to be an ATLAS CLAPACK library. - return ; + return 1; fi for x in lib${pt}cblas.a libatlas.a lib${pt}f77blas.a; do diff --git a/src/gst-plugin/Makefile b/src/gst-plugin/Makefile index e9dec8f78fe..a9b3a208ff1 100644 --- a/src/gst-plugin/Makefile +++ b/src/gst-plugin/Makefile @@ -17,7 +17,7 @@ EXTRA_LDLIBS += $(shell pkg-config --libs glib-2.0) #Kaldi shared libraries required by the GStreamer plugin EXTRA_LDLIBS += -lkaldi-online -lkaldi-lat -lkaldi-decoder -lkaldi-feat -lkaldi-transform \ -lkaldi-gmm -lkaldi-hmm \ - -lkaldi-tree -lkaldi-matrix -lkaldi-util -lkaldi-base -lkaldi-thread + -lkaldi-tree -lkaldi-matrix -lkaldi-util -lkaldi-base OBJFILES = gst-audio-source.o gst-online-gmm-decode-faster.o diff --git a/src/nnet3/nnet-compile-looped.cc b/src/nnet3/nnet-compile-looped.cc index 5701c010680..b0ca42f15ab 100644 --- a/src/nnet3/nnet-compile-looped.cc +++ b/src/nnet3/nnet-compile-looped.cc @@ -38,13 +38,25 @@ void ModifyNnetIvectorPeriod(int32 ivector_period, KALDI_ASSERT(b && "Could not parse config line."); if (config_line.FirstToken() == "component-node") { std::string whole_line = config_lines[i]; - std::string to_search_for = "ReplaceIndex(ivector, t, 0)"; + std::string to_search_for = "ReplaceIndex("; + std::string::size_type to_search_for_size = to_search_for.size(); std::string::size_type pos = whole_line.find(to_search_for); if (pos != std::string::npos) { - std::ostringstream to_replace_with; - to_replace_with << "Round(ivector, " << ivector_period << ")"; - whole_line.replace(pos, to_search_for.size(), to_replace_with.str()); - config_to_read << whole_line << "\n"; + std::string::size_type comma_pos = whole_line.find(',', pos); + if (comma_pos != std::string::npos) { + // if the line contained ReplaceIndex(ivector, t, 0), + // descriptor_name would now be 'ivector'. + std::string descriptor_name = + whole_line.substr(pos + to_search_for_size, + comma_pos - (pos + to_search_for_size)); + std::string::size_type end_pos = whole_line.find(')', pos); + std::string::size_type expr_size = end_pos + 1 - pos; + // e.g. expr_size would be strlen("ReplaceIndex(ivector, t, 0)"). + std::ostringstream to_replace_with; + to_replace_with << "Round(" << descriptor_name << ", " << ivector_period << ")"; + whole_line.replace(pos, expr_size, to_replace_with.str()); + config_to_read << whole_line << "\n"; + } } } } diff --git a/src/nnet3/nnet-compile-looped.h b/src/nnet3/nnet-compile-looped.h index f6ff47045fe..2ebb371ecc5 100644 --- a/src/nnet3/nnet-compile-looped.h +++ b/src/nnet3/nnet-compile-looped.h @@ -83,12 +83,12 @@ int32 GetChunkSize(const Nnet &nnet, We normally train neural networks that expect to see an iVector at frame zero only; this is because we train on fixed-size chunks and the iVector doesn't change that much within each chunk. However, expecting just one iVector - isn't that convenient for looped recognition because it changes with - time, so we modify the iVector input period in the network by replacing - expressions like ReplaceIndex(ivector, t, 0) or just "t", with - Round(ivector, 10) [assuming ivector_period == 10]. This won't work - in every conceivable network, but it does do what you want in the - cases of interest. + isn't that convenient for looped recognition because it changes with time, so + we modify the iVector input period in the network by replacing expressions + like ReplaceIndex(ivector, t, 0) with Round(ivector, 10) [assuming + ivector_period == 10]. The descriptor doesn't have to be named "ivector", it + would work for ReplaceIndex(foo, t, 0). This won't work in every conceivable + network, but it does do what you want in the cases of interest. It does this in a rather simple way, by getting the config lines that correspond to descriptors, and doing a search-and-replace. It's diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 38e2559ac62..4ad2543c602 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -3339,7 +3339,7 @@ class ComputationLoopedOptimizer { // Given a vector of lists, one list for each segment, of the active matrices // at the end of that segment, this function converts those lists into a - // different representation where each matrix is reprented as a pair instead + // different representation where each matrix is represented as a pair instead // of as a single int32. 'active_pairs' will have the same dimensions as // 'active_matrices'. static void ConvertListsToPairLists( @@ -3347,16 +3347,19 @@ class ComputationLoopedOptimizer { const std::vector > &matrix_to_pair, std::vector > > *active_pairs); - // This function modifies the lists of active matrices per segment - // (represented as pairs) in 'active_pairs' by sorting them and - // then subtracting the time-offset of the first pair in each - // list ((*active_pair)[seg][0].second), from all elements in that list. - // It puts the subtracted offset in (*time_offsets)[seg]. This change - // of representation makes it easy to tell whether the sets of active - // matrices for different segments are identical up to a time-offset. - static void NormalizePairLists( - std::vector > > *active_pairs, - std::vector *time_offsets); + // This function, used in FindFirstRepeat, tells us whether the two lists a + // and b are the same except for a possible time-shift. + // Each element of a or b is of the form (matrix-unique-index, time-offset). + // Let's suppose we have two pairs p1=(m1, o1) and p2=(m2, o2). + // For p2 to be equal to p1 except for a possible shift of value 'shift', we + // require m2 == m1 and either o2 == o1 + 'shift' or o2 == o1. + // This function returns true if a.size() == b.size() and for each + // i, b[i].first == a[i].first and b[i].second is either + // a[i].second or a[i].second + shift. + static bool ListsAreEqualExceptForPossibleShift( + const std::vector > &a, + const std::vector > &b, + int32 shift); // This function looks in the matrix 'active_pairs' for the first pair of // identical values, i.e. it is looking for i < j for which @@ -3376,18 +3379,23 @@ class ComputationLoopedOptimizer { // each segment should be shifted relative to the previous segment, by // 'time_shift_per_segment'. static bool FindFirstRepeat( - const std::vector > > &normalized_active_pairs, - const std::vector &time_offsets, + const std::vector > > &active_pairs, int32 time_shift_per_segment, int32 *seg1, int32 *seg2); - // Converts a list of pairs (e.g. one of the elements of the output of - // 'ConvertListsToPairLists)', back into a list of matrix indexes, using the - // map 'pair_to_matrix'. - static void PairListToMatrixList( - const std::vector > &pair_list, + + // 'pair_list1' is the list of active (unique-id, time-offset) pairs for one + // segment of the computation and 'pair_list2' is the same list for a later + // segment. The map 'pair_to_matrix' can convert these back into matrix + // indexes. This function will output two lists of matrices. These will just + // be 'pair_list1' and 'pair_list2' converted back into matrix indexes, + // except we omit pairs which are identical (i.e. the time-offset was zero). + static void GetIdentifiedMatrices( + const std::vector > &pair_list1, + const std::vector > &pair_list2, const unordered_map, int32, PairHasher > &pair_to_matrix, - std::vector *matrix_list); + std::vector *matrix_list1, + std::vector *matrix_list2); // This function just does some checking (via asserts), that @@ -3529,7 +3537,7 @@ int32 ComputationLoopedOptimizer::NormalizeCindexes( } if (iter == end) { // this should not happen. - KALDI_ERR << "All t value are kNoTime in matrix."; + KALDI_ERR << "All t values are kNoTime in matrix."; } iter = cindexes->begin(); for (; iter != end; iter++) @@ -3608,49 +3616,41 @@ void ComputationLoopedOptimizer::ConvertListsToPairLists( } // static -void ComputationLoopedOptimizer::NormalizePairLists( - std::vector > > *active_pairs, - std::vector *time_offsets) { - int32 num_segments = active_pairs->size(); - time_offsets->resize(num_segments); - for (int32 seg = 0; seg < num_segments; seg++) { - std::vector > &this_pairs = (*active_pairs)[seg]; - std::sort(this_pairs.begin(), this_pairs.end()); - int32 this_offset; - if (!this_pairs.empty()) { - this_offset = this_pairs[0].second; - } else { - // if this_pairs is empty, produce arbitrary offsets that are increasing - // (this will keep some self-testing code happy). - if (seg == 0) { this_offset = 0; } - else { this_offset = (*time_offsets)[seg - 1] + 1; } - } - (*time_offsets)[seg] = this_offset; - std::vector >::iterator - iter = this_pairs.begin(), end = this_pairs.end(); - for (; iter != end; ++iter) - iter->second -= this_offset; +bool ComputationLoopedOptimizer::ListsAreEqualExceptForPossibleShift( + const std::vector > &a, + const std::vector > &b, + int32 shift) { + size_t size = a.size(); + if (b.size() != size) + return false; + for (size_t i = 0; i < size; i++) { + const std::pair &p1 = a[i], + &p2 = b[i]; + if (p1.first != p2.first) + return false; + if (p2.second != p1.second + shift && p2.second != p1.second) + return false; } + return true; } - // static bool ComputationLoopedOptimizer::FindFirstRepeat( - const std::vector > > &normalized_active_pairs, - const std::vector &time_offsets, + const std::vector > > &active_pairs, int32 time_shift_per_segment, int32 *seg1, int32 *seg2) { - int32 num_segments = normalized_active_pairs.size(); + int32 num_segments = active_pairs.size(); // This algorithm may seem like it would be very slow, but the number of // segments will normally be quite small (e.g. 10), and the comparison of - // elements of 'normalized_active_pairs' should be fast in cases where they + // elements of 'active_pairs' should be fast in cases where they // differ. KALDI_ASSERT(num_segments >= 2); for (int32 s = 0; s < num_segments; s++) { for (int32 t = s + 1; t < num_segments; t++) { - if ((time_offsets[t]-time_offsets[s] == (t-s) * time_shift_per_segment) && - normalized_active_pairs[s] == normalized_active_pairs[t]) { + if (ListsAreEqualExceptForPossibleShift(active_pairs[s], + active_pairs[t], + (t - s) * time_shift_per_segment)) { *seg1 = s; *seg2 = t; return true; @@ -3661,22 +3661,35 @@ bool ComputationLoopedOptimizer::FindFirstRepeat( } // static -void ComputationLoopedOptimizer::PairListToMatrixList( - const std::vector > &pair_list, +void ComputationLoopedOptimizer::GetIdentifiedMatrices( + const std::vector > &pair_list1, + const std::vector > &pair_list2, const unordered_map, int32, PairHasher > &pair_to_matrix, - std::vector *matrix_list) { - matrix_list->resize(pair_list.size()); + std::vector *matrix_list1, + std::vector *matrix_list2) { + size_t size = pair_list1.size(); + KALDI_ASSERT(pair_list2.size() == size); + matrix_list1->clear(); + matrix_list2->clear(); + matrix_list1->reserve(size); + matrix_list2->reserve(size); std::vector >::const_iterator - iter = pair_list.begin(), end = pair_list.end(); - std::vector::iterator out_iter = matrix_list->begin(); - for (; iter != end; ++iter, ++out_iter) { + iter1 = pair_list1.begin(), end1 = pair_list1.end(), + iter2 = pair_list2.begin(); + for (; iter1 != end1; ++iter1, ++iter2) { + if (iter1->second == iter2->second) + continue; + // skip those that have no time shift, we won't have to do any swapping for + // those. unordered_map, int32, PairHasher >::const_iterator - map_iter = pair_to_matrix.find(*iter); - if (map_iter == pair_to_matrix.end()) { + map_iter1 = pair_to_matrix.find(*iter1), + map_iter2 = pair_to_matrix.find(*iter2); + if (map_iter1 == pair_to_matrix.end() || + map_iter2 == pair_to_matrix.end()) KALDI_ERR << "Could not find pair in map (code error)"; - } - *out_iter = map_iter->second; + matrix_list1->push_back(map_iter1->second); + matrix_list2->push_back(map_iter2->second); } } @@ -3895,7 +3908,7 @@ bool ComputationLoopedOptimizer::Optimize() { std::vector > matrix_to_pair; CreateMatrixPairs(*computation_, &matrix_to_pair); - // Create the reverse map from pair to matrix index; we'll need it. + // Create the reverse map from pair to matrix index; we'll need it later. unordered_map, int32, PairHasher > pair_to_matrix; GetPairToMatrixMap(matrix_to_pair, &pair_to_matrix); @@ -3904,34 +3917,25 @@ bool ComputationLoopedOptimizer::Optimize() { ConvertListsToPairLists(active_matrices, matrix_to_pair, &pair_lists); - std::vector time_offsets; - NormalizePairLists(&pair_lists, &time_offsets); - // Note: seg1 and seg2 are indexes into 'splice_points', representing // potential splice points (located near the beginnings of segments). int32 seg1, seg2; if (!FindFirstRepeat(pair_lists, - time_offsets, time_shift_per_segment, &seg1, &seg2)) { KALDI_VLOG(2) << "Could not find repeats of variables."; return false; } - // reverse the normalization for segments seg1 and seg2. - for (size_t i = 0; i < pair_lists[seg1].size(); i++) - pair_lists[seg1][i].second += time_offsets[seg1]; - for (size_t i = 0; i < pair_lists[seg2].size(); i++) - pair_lists[seg2][i].second += time_offsets[seg2]; std::vector seg1_matrices, seg2_matrices; - PairListToMatrixList(pair_lists[seg1], pair_to_matrix, &seg1_matrices); - PairListToMatrixList(pair_lists[seg2], pair_to_matrix, &seg2_matrices); + GetIdentifiedMatrices(pair_lists[seg1], pair_lists[seg2], + pair_to_matrix, + &seg1_matrices, &seg2_matrices); - int32 time_difference = time_offsets[seg2] - time_offsets[seg1]; + int32 time_difference = time_shift_per_segment * (seg2 - seg1); CheckIdentifiedMatrices(*computation_, seg1_matrices, seg2_matrices, time_difference); - FormInfiniteLoop(splice_points[seg1], splice_points[seg2], computation_); AddMatrixSwapCommands(seg1_matrices, seg2_matrices, computation_); diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index da19b477337..f1e47b2794b 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -2670,8 +2670,8 @@ std::string NaturalGradientAffineComponent::Info() const { PrintParameterStats(stream, "bias", bias_params_, true); stream << ", rank-in=" << rank_in_ << ", rank-out=" << rank_out_ - << ", num_samples_history=" << num_samples_history_ - << ", update_period=" << update_period_ + << ", num-samples-history=" << num_samples_history_ + << ", update-period=" << update_period_ << ", alpha=" << alpha_; return stream.str(); } @@ -5375,7 +5375,8 @@ std::string BatchNormComponent::Info() const { std::ostringstream stream; stream << Type() << ", dim=" << dim_ << ", block-dim=" << block_dim_ << ", epsilon=" << epsilon_ << ", target-rms=" << target_rms_ - << ", count=" << count_; + << ", count=" << count_ + << ", test-mode=" << (test_mode_ ? "true" : "false"); if (count_ > 0) { Vector mean(stats_sum_), var(stats_sumsq_); mean.Scale(1.0 / count_); diff --git a/tools/extras/install_mmseg.sh b/tools/extras/install_mmseg.sh index b931b93674f..586740b5cbc 100755 --- a/tools/extras/install_mmseg.sh +++ b/tools/extras/install_mmseg.sh @@ -39,6 +39,7 @@ if [ -d ./mmseg-1.3.0 ] ; then echo >&2 "$0: Warning: old installation of mmseg found. You should manually" echo >&2 " delete the directory tools/mmseg and " echo >&2 " edit the file tools/env.sh and remove manually all references to it" + exit 1 fi if [ ! -d ./mmseg-1.3.0 ] ; then @@ -46,22 +47,39 @@ if [ ! -d ./mmseg-1.3.0 ] ; then tar xf mmseg-1.3.0.tar.gz fi -pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` -export PYTHONPATH=$PYTHONPATH:`pwd`/mmseg-1.3.0/lib/python${pyver}/site-packages +( cd mmseg-1.3.0 -mkdir -p lib/python${pyver}/site-packages +pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` +export PYTHONPATH=$PYTHONPATH:$PWD/lib/python${pyver}/site-packages/:$PWD/lib64/python${pyver}/site-packages/ +# we have to create those dir, as the install target does not create it +mkdir -p $PWD/lib/python${pyver}/site-packages/ +mkdir -p $PWD/lib64/python${pyver}/site-packages/ python setup.py build python setup.py install --prefix `pwd` -cd ../ - -( - set +u - pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` - wd=`pwd` +) - [ -f ./env.sh ] && . ./env.sh +## we first find the mmseg.py file (the module name which will be imported, +## so that should be pretty reliable) and then we work out the location of +## the site-packages directory (typically it would be one level up from +## the location of the mmseg.py file but using find seems more reliable +mmseg_file_lib=$(find ./mmseg-1.3.0/lib/ -type f -name mmseg.py | head -n1) +mmseg_file_lib64=$(find ./mmseg-1.3.0/lib64/ -type f -name mmseg.py | head -n1) +if [ ! -z ${mmseg_file_lib+x} ]; then + lib_dir=./lib/ +elif [ ! -z ${mmseg_file_lib64+x} ]; then + lib_dir=./lib64/ +else + echo >&2 "$0: ERROR: Didn't find ./mmseg-1.3.0/lib/ or ./mmseg-1.3.0/lib64/" + echo >&2 " Perhaps your python or system installs python modules into" + echo >&2 " a different dir or some other unknown issues arised. Review the output" + echo >&2 " of the script and try to figure out what went wrong." + exit 1 +fi - echo "export PYTHONPATH=\$PYTHONPATH:$wd/mmseg-1.3.0/lib/python${pyver}/site-packages" +site_packages_dir=$(cd ./mmseg-1.3.0; find $lib_dir -name "site-packages" -type d | head -n1) +( + echo "export MMSEG=\"$PWD/mmseg-1.3.0\"" + echo "export PYTHONPATH=\"\${PYTHONPATH:-}:\$MMSEG/${site_packages_dir}\"" ) >> env.sh echo >&2 "Installation of mmseg finished successfully" diff --git a/tools/extras/install_sequitur.sh b/tools/extras/install_sequitur.sh index ba6d028edad..f14057bb494 100755 --- a/tools/extras/install_sequitur.sh +++ b/tools/extras/install_sequitur.sh @@ -11,7 +11,6 @@ fi ! [ `basename $PWD` == tools ] && \ echo "You must call this script from the tools/ directory" && exit 1; - # Install python-devel package if not already available # first, makes sure distutils.sysconfig usable if ! $(python -c "import distutils.sysconfig" &> /dev/null); then @@ -46,6 +45,7 @@ if [ -d ./g2p ] || [ -d sequitur ] ; then echo >&2 "$0: Warning: old installation of Sequitur found. You should manually" echo >&2 " delete the directories tools/sequitur and/or tools/g2p and " echo >&2 " edit the file tools/env.sh and remove manually all references to it" + exit 1 fi if [ ! -d ./sequitur-g2p ] ; then @@ -67,13 +67,12 @@ fi #in a couple of months. ln -sf sequitur-g2p sequitur - +( cd sequitur-g2p make CXX=g++ CC=gcc python setup.py install --prefix `pwd` - -cd ../ - +) +site_packages_dir=$(cd sequitur-g2p; find ./lib{,64} -type d -name site-packages | head -n 1) ( set +u [ ! -z "${SEQUITUR}" ] && \ @@ -88,10 +87,9 @@ cd ../ wd=`pwd` wd=`readlink -f $wd || pwd` - echo "export SEQUITUR=$wd/sequitur-g2p" - echo "export PATH=\$PATH:\${SEQUITUR}/bin" - echo "_site_packages=\`find \${SEQUITUR}/lib -type d -regex '.*python.*/site-packages'\`" - echo "export PYTHONPATH=\${PYTHONPATH:-}:\$_site_packages" + echo "export SEQUITUR=\"$wd/sequitur-g2p\"" + echo "export PATH=\"\$PATH:\${SEQUITUR}/bin\"" + echo "export PYTHONPATH=\"\${PYTHONPATH:-}:\$SEQUITUR/${site_packages_dir}\"" ) >> env.sh echo >&2 "Installation of SEQUITUR finished successfully"