Fix broken port-audio install for Kaldi and protobuf for Tensor (#282)

* fix broken port-audio install for Kaldi * reformat dockerfile for kaldi install troubleshooting, and port-audio * pin protobuf * nicer fix for PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION error
CoEDL · Jun 9, 2022 · 8384147 · 8384147
1 parent afa94ce
commit 8384147
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 9 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -7,7 +7,7 @@ FROM ubuntu:20.04
 
 ########################## BEGIN INSTALLATION #########################
 
-ENV NUM_CPUS=12
+ENV NUM_CPUS=6
 
 ENV TZ=UTC
 
@@ -91,15 +91,16 @@ RUN echo "===> Install Kaldi dependencies" && \
 WORKDIR /
 
 RUN echo "===> Install Kaldi (pinned at version 5.3)"  && \
-    git clone -b 5.3 https://github.com/kaldi-asr/kaldi && \
-    cd /kaldi/tools && \
-    make -j$NUM_CPUS && \
-    ./install_portaudio.sh && \
-    cd /kaldi/src && ./configure --mathlib=ATLAS --shared  && \
+    git clone -b 5.3 https://github.com/kaldi-asr/kaldi
+
+COPY deps/pa_stable_v19_20111121.tgz /kaldi/tools/pa_stable_v19_20111121.tgz
+
+RUN cd /kaldi/tools && make -j$NUM_CPUS && ./install_portaudio.sh
+RUN cd /kaldi/src && ./configure --mathlib=ATLAS --shared && \
     sed -i '/-g # -O0 -DKALDI_PARANOID/c\-O3 -DNDEBUG' kaldi.mk && \
-    make depend -j$NUM_CPUS && make -j$NUM_CPUS && \
-    cd /kaldi/src/online2 && make depend -j$NUM_CPUS && make -j$NUM_CPUS && \
-    cd /kaldi/src/online2bin && make depend -j$NUM_CPUS && make -j$NUM_CPUS
+    make depend -j$NUM_CPUS && make -j$NUM_CPUS
+RUN cd /kaldi/src/online2 && make depend -j$NUM_CPUS && make -j$NUM_CPUS
+RUN cd /kaldi/src/online2bin && make depend -j$NUM_CPUS && make -j$NUM_CPUS
 
 COPY deps/srilm-1.7.2.tar.gz /kaldi/tools/srilm.tgz
 
@@ -160,10 +161,13 @@ RUN pip install --upgrade pip
 # Install deps using pip rather than poetry mainly because poetry doesn't have -f support for the +cu111 version details
 # Override the dep info from requirements.txt so that we can specifiy CUDA version
 # Pin transformers to 4.6.0 because the model class has args code which breaks on later versions
+# Pin protobuf to fix `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION` error
+RUN pip install protobuf==3.20.*
 RUN pip install transformers==4.6.0 datasets jiwer==2.2.0 lang-trans==0.6.0 librosa==0.8.0
 # Set torch version for CUDA 11
 RUN pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
 RUN pip install tensorboard==2.7.0
+
 # Cache the pretrained models
 COPY download_wav2vec2.py /root/download_wav2vec2.py
 RUN python /root/download_wav2vec2.py

diff --git a/deps/pa_stable_v19_20111121.tgz b/deps/pa_stable_v19_20111121.tgz