Skip to content

Commit

Permalink
Fix kill fail bug (#11635)
Browse files Browse the repository at this point in the history
* 1. Remove PYTHON_FLAGS from paddle_build.sh in paddlepaddle/paddle:latest-dev

* 1. Add PYTHON_FLAGS back
2. Change SIGKILL to SIGINT and SIGTERM

* 1. Add setup.py.in back

* 1. add pip install open-cv in Dockerfile to avoid libusb_exit hanging up which is caused by the opencv-python package missing

* 1. Add the && \ to line above

* 1. Remove the notice comment
  • Loading branch information
velconia authored and typhoonzero committed Jun 22, 2018
1 parent dbca7f1 commit dda24f1
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 15 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ RUN easy_install -U pip && \
pip install sphinx-rtd-theme==0.1.9 recommonmark

RUN pip install pre-commit 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0'
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install opencv-python

#For docstring checker
RUN pip install pylint pytest astroid isort
Expand Down
18 changes: 9 additions & 9 deletions paddle/scripts/paddle_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
function print_usage() {
echo -e "\n${RED}Usage${NONE}:
${BOLD}${SCRIPT_NAME}${NONE} [OPTION]"

echo -e "\n${RED}Options${NONE}:
${BLUE}build${NONE}: run build for x86 platform
${BLUE}build_android${NONE}: run build for android platform
Expand Down Expand Up @@ -198,7 +198,7 @@ function build_android() {
fi

ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API

cat <<EOF
============================================
Generating the standalone toolchain ...
Expand All @@ -212,13 +212,13 @@ EOF
--arch=$ANDROID_ARCH \
--platform=android-$ANDROID_API \
--install-dir=$ANDROID_STANDALONE_TOOLCHAIN

BUILD_ROOT=${PADDLE_ROOT}/build_android
DEST_ROOT=${PADDLE_ROOT}/install_android

mkdir -p $BUILD_ROOT
cd $BUILD_ROOT

if [ $ANDROID_ABI == "armeabi-v7a" ]; then
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
Expand Down Expand Up @@ -286,7 +286,7 @@ function build_ios() {
-DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \
-DCMAKE_BUILD_TYPE=Release

make -j 2
}

Expand Down Expand Up @@ -331,14 +331,14 @@ EOF
function bind_test() {
# the number of process to run tests
NUM_PROC=6

# calculate and set the memory usage for each process
MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`)
export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE

# get the CUDA device count
CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)

for (( i = 0; i < $NUM_PROC; i++ )); do
cuda_list=()
for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do
Expand Down
5 changes: 2 additions & 3 deletions python/paddle/fluid/tests/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ if(NOT WITH_DISTRIBUTE)
endif(NOT WITH_DISTRIBUTE)

list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185
list(REMOVE_ITEM TEST_OPS test_nce) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778
list(REMOVE_ITEM TEST_OPS test_recurrent_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/6152
Expand Down Expand Up @@ -43,12 +43,11 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list(REMOVE_ITEM TEST_OPS test_dist_train)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed)
# TODO(wuyi): this test hungs on CI, will add it back later
list(REMOVE_ITEM TEST_OPS test_listen_and_serv_op)
foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP)
py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL)
py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,15 @@ def test_handle_signal_in_serv_op(self):
self._wait_ps_ready(p1.pid)

# raise SIGTERM to pserver
os.kill(p1.pid, signal.SIGKILL)
os.kill(p1.pid, signal.SIGINT)
p1.join()

# run pserver on CPU in async mode
p2 = self._start_pserver(False, False)
self._wait_ps_ready(p2.pid)

# raise SIGTERM to pserver
os.kill(p2.pid, signal.SIGKILL)
os.kill(p2.pid, signal.SIGTERM)
p2.join()


Expand Down

0 comments on commit dda24f1

Please sign in to comment.